Beispiel #1
0
class WMStatsTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["WMStats"]
        self.testInit = TestInitCouchApp('WorkQueueServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        self.testInit.setupCouch('wmstats_t', *self.couchApps)
        self.wmstatsWriter = WMStatsWriter(self.testInit.couchUrl, 'wmstats_t');
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testWMStatsWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        self.assertEquals(self.wmstatsWriter.insertRequest(schema[0]), 'OK', 'insert fail');
        self.assertEquals(self.wmstatsWriter.updateRequestStatus(schema[0]['RequestName'], "failed"), 'OK', 'update fail')
        self.assertEquals(self.wmstatsWriter.updateRequestStatus("not_exist_schema", "assigned"),
                          'ERROR: request not found - not_exist_schema')
        self.assertEquals(self.wmstatsWriter.updateTeam(schema[0]['RequestName'], 'teamA'), 'OK', 'update fail')
        self.assertEquals(self.wmstatsWriter.updateTeam("not_exist_schema", 'teamA'),
                          'ERROR: request not found - not_exist_schema')
        totalStats = {'total_jobs': 100, 'input_events': 1000, 'input_lumis': 1234, 'input_num_files': 5}
        self.assertEquals(self.wmstatsWriter.insertTotalStats(schema[0]['RequestName'], totalStats), 'INSERTED', 'update fail')
        self.assertEquals(self.wmstatsWriter.insertTotalStats(schema[0]['RequestName'], totalStats), 'UPDATED', 'update fail')
        self.assertEquals(self.wmstatsWriter.insertTotalStats("not_exist_schema", totalStats),
                          'ERROR: request not found - not_exist_schema')
        spec1 = newWorkload(schema[0]['RequestName'])
        production = spec1.newTask("Production")
        production.setTaskType("Merge")
        production.setSiteWhitelist(['TEST_SITE'])
        self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec1), 'OK', 'update fail')
        spec2 = newWorkload("not_exist_schema")
        production = spec2.newTask("Production")
        production.setTaskType("Merge")
        self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec2),
                          'ERROR: request not found - not_exist_schema')
Beispiel #2
0
def changeRequestStatus(requestName, newState, priority=None, wmstatUrl=None):
    """
    _changeRequestStatus_

    Basic API to change a request to a new state, also
    includes optional priority change for the request

    - *requestName* : name of the request to be modified
    - *newState*    : name of the new status for the request
    - *priority* : optional integer priority
    
    Apparently when changing request state (on assignment page),
    it's possible to change priority at one go. Hence the argument is here.

    """
    # MySQL/Oracle
    factory = DBConnect.getConnection()
    reqId = getRequestID(factory, requestName)
    changeRequestIDStatus(reqId, newState, priority)

    # CouchDB
    # have to first get information where the request Couch document is,
    # extracting the information from reqmgr_request.workflow table field
    reqData = factory(classname="Request.Get").execute(reqId)
    # this would be something like this:
    # http://localhost:5984/reqmgr_workload_cache/maxa_RequestString-OVERRIDE-ME_130306_205649_8066/spec
    wfUrl = reqData['workflow']
    # cut off /maxa_RequestString-OVERRIDE-ME_130306_205649_8066/spec
    couchUrl = wfUrl.replace('/' + requestName + "/spec", '')
    couchDbName = couchUrl[couchUrl.rfind('/') + 1:]
    # cut off database name from the URL
    url = couchUrl.replace('/' + couchDbName, '')
    couchDb = Database(couchDbName, url)
    fields = {"RequestStatus": newState}
    couchDb.updateDocument(requestName,
                           "ReqMgr",
                           "updaterequest",
                           fields=fields,
                           useBody=True)

    #TODO: should we make this mendatory?
    if wmstatUrl:
        wmstatSvc = WMStatsWriter(wmstatUrl)
        wmstatSvc.updateRequestStatus(requestName, newState)
Beispiel #3
0
def changeRequestStatus(requestName, newState, priority = None, wmstatUrl = None):
    """
    _changeRequestStatus_

    Basic API to change a request to a new state, also
    includes optional priority change for the request

    - *requestName* : name of the request to be modified
    - *newState*    : name of the new status for the request
    - *priority* : optional integer priority

    """
    #TODO: should we make this mendatory?
    if wmstatUrl:
        wmstatSvc = WMStatsWriter(wmstatUrl)
        wmstatSvc.updateRequestStatus(requestName, newState)

    factory = DBConnect.getConnection()
    reqId = getRequestID(factory, requestName)
    changeRequestIDStatus(reqId, newState, priority)
    return
Beispiel #4
0
def changeRequestStatus(requestName, newState, priority=None, wmstatUrl=None):
    """
    _changeRequestStatus_

    Basic API to change a request to a new state, also
    includes optional priority change for the request

    - *requestName* : name of the request to be modified
    - *newState*    : name of the new status for the request
    - *priority* : optional integer priority
    
    Apparently when changing request state (on assignment page),
    it's possible to change priority at one go. Hence the argument is here.

    """
    # MySQL/Oracle
    factory = DBConnect.getConnection()
    reqId = getRequestID(factory, requestName)
    changeRequestIDStatus(reqId, newState, priority)

    # CouchDB
    # have to first get information where the request Couch document is,
    # extracting the information from reqmgr_request.workflow table field
    reqData = factory(classname="Request.Get").execute(reqId)
    # this would be something like this:
    # http://localhost:5984/reqmgr_workload_cache/maxa_RequestString-OVERRIDE-ME_130306_205649_8066/spec
    wfUrl = reqData["workflow"]
    # cut off /maxa_RequestString-OVERRIDE-ME_130306_205649_8066/spec
    couchUrl = wfUrl.replace("/" + requestName + "/spec", "")
    couchDbName = couchUrl[couchUrl.rfind("/") + 1 :]
    # cut off database name from the URL
    url = couchUrl.replace("/" + couchDbName, "")
    couchDb = Database(couchDbName, url)
    fields = {"RequestStatus": newState}
    couchDb.updateDocument(requestName, "ReqMgr", "updaterequest", fields=fields, useBody=True)

    # TODO: should we make this mendatory?
    if wmstatUrl:
        wmstatSvc = WMStatsWriter(wmstatUrl)
        wmstatSvc.updateRequestStatus(requestName, newState)
Beispiel #5
0
def changeRequestStatus(requestName, newState, priority = None, wmstatUrl = None):
    """
    _changeRequestStatus_

    Basic API to change a request to a new state, also
    includes optional priority change for the request

    - *requestName* : name of the request to be modified
    - *newState*    : name of the new status for the request
    - *priority* : optional integer priority
    
    Apparently when changing request state (on assignment page),
    it's possible to change priority at one go. Hence the argument is here.

    """
    #TODO: should we make this mendatory?
    if wmstatUrl:
        wmstatSvc = WMStatsWriter(wmstatUrl)
        wmstatSvc.updateRequestStatus(requestName, newState)

    factory = DBConnect.getConnection()
    reqId = getRequestID(factory, requestName)
    changeRequestIDStatus(reqId, newState, priority)
    return
Beispiel #6
0
def changeRequestStatus(requestName, newState, priority=None, wmstatUrl=None):
    """
    _changeRequestStatus_

    Basic API to change a request to a new state, also
    includes optional priority change for the request

    - *requestName* : name of the request to be modified
    - *newState*    : name of the new status for the request
    - *priority* : optional integer priority
    
    Apparently when changing request state (on assignment page),
    it's possible to change priority at one go. Hence the argument is here.

    """
    #TODO: should we make this mendatory?
    if wmstatUrl:
        wmstatSvc = WMStatsWriter(wmstatUrl)
        wmstatSvc.updateRequestStatus(requestName, newState)

    factory = DBConnect.getConnection()
    reqId = getRequestID(factory, requestName)
    changeRequestIDStatus(reqId, newState, priority)
    return
Beispiel #7
0
def updateRequestStatus(couchURL, requestList, status):
    ww = WMStatsWriter(couchURL)
    for request in requestList:
        ww.updateRequestStatus(request, status)
        print("%s is udated to %s" % (request, status))
Beispiel #8
0
class Tier0PluginTest(unittest.TestCase):


    def setUp(self):
        """
        _setUp_

        Setup the test environment
        """
        self.testInit = TestInit(__file__)
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(["WMCore.WMBS"])
        self.wmstatsCouchDB = 'wmstats_plugin_t'
        self.testInit.setupCouch(self.wmstatsCouchDB, 'WMStats')
        self.testDir = self.testInit.generateWorkDir()

        self.wmstatsWriter = WMStatsWriter(os.environ['COUCHURL'], self.wmstatsCouchDB)

        self.stateMap = {}
        self.orderedStates = []
        self.plugin = None

        return

    def tearDown(self):
        """
        _tearDown_

        Clear databases and delete files
        """
        self.testInit.tearDownCouch()
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()

        return

    def setupRepackWorkflow(self):
        """
        _setupRepackWorkflow_

        Populate WMBS with a repack-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Repack_Run481516_StreamZ'
        mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW',
                      'RepackMergewrite_SingleNeutrino_RAW']

        self.stateMap = {'Merge' : [],
                         'Processing Done' : []}
        self.orderedStates = ['Merge', 'Processing Done']

        # Populate WMStats
        self.wmstatsWriter.insertGenericRequest({'_id' : workflowName})
        self.wmstatsWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        repackTask = workload.newTask('Repack')
        for task in mergeTasks:
            repackTask.addTask(task)
        repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW')

        specPath = os.path.join(self.testDir, 'Repack.pkl')
        workload.save(specPath)

        # Populate WMBS
        topFileset = Fileset(name = 'TestStreamerFileset')
        topFileset.create()

        options = {'spec' : specPath, 'owner' : 'ItsAMeMario',
                   'name' : workflowName, 'wfType' : 'tier0'}
        topLevelWorkflow = Workflow(task = '/%s/Repack' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(topFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topFileset)
        for task in mergeTasks:
            mergeWorkflow = Workflow(task = '/%s/Repack/%s' % (workflowName, task), **options)
            mergeWorkflow.create()
            unmergedFileset = Fileset(name = 'TestUnmergedFileset%s' % task)
            unmergedFileset.create()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.create()
            self.stateMap['Processing Done'].append(unmergedFileset)
        cleanupWorkflow = Workflow(task = '/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW',
                                   **options)
        cleanupWorkflow.create()
        unmergedFileset = Fileset(name = 'TestUnmergedFilesetToCleanup')
        unmergedFileset.create()
        cleanupSub = Subscription(unmergedFileset, cleanupWorkflow)
        cleanupSub.create()

        return

    def setupExpressWorkflow(self):
        """
        _setupExpressWorkflow_

        Populate WMBS with a express-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Express_Run481516_StreamZFast'
        secondLevelTasks = ['ExpressMergewrite_StreamZFast_DQM', 'ExpressMergewrite_ExpressPhysics_FEVT',
                            'ExpressAlcaSkimwrite_StreamZFast_ALCARECO', 'ExpressCleanupUnmergedwrite_StreamZFast_DQM',
                            'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT', 'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO']
        alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd'
        dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged'

        self.stateMap = {'Merge' : [],
                         'Harvesting' : [],
                         'Processing Done' : []}
        self.orderedStates = ['Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.wmstatsWriter.insertGenericRequest({'_id' : workflowName})
        self.wmstatsWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        expressTask = workload.newTask('Express')
        for task in secondLevelTasks:
            secondLevelTask = expressTask.addTask(task)
            if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO':
                secondLevelTask.addTask(alcaHarvestTask)
            elif task == 'ExpressMergewrite_StreamZFast_DQM':
                secondLevelTask.addTask(dqmHarvestTask)

        specPath = os.path.join(self.testDir, 'Express.pkl')
        workload.save(specPath)

        # Populate WMBS
        sharedFileset = Fileset(name = 'TestFileset')
        sharedFileset.create()
        sharedFileset.markOpen(False)

        options = {'spec' : specPath, 'owner' : 'ItsAMeMario',
                   'name' : workflowName, 'wfType' : 'tier0'}
        topLevelWorkflow = Workflow(task = '/%s/Express' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(sharedFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topLevelSub)
        for task in filter(lambda x : not x.count('CleanupUnmerged'), secondLevelTasks):
            secondLevelWorkflow = Workflow(task = '/%s/Express/%s' % (workflowName, task), **options)
            secondLevelWorkflow.create()
            mergeSub = Subscription(sharedFileset, secondLevelWorkflow)
            mergeSub.create()
            self.stateMap['Harvesting'].append(mergeSub)

        for (parent, child) in [('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask),
                                ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)]:
            harvestingWorkflow = Workflow(task = '/%s/Express/%s/%s' % (workflowName, parent, child),
                                          **options)
            harvestingWorkflow.create()
            harvestingSub = Subscription(sharedFileset, harvestingWorkflow)
            harvestingSub.create()
            self.stateMap['Processing Done'].append(harvestingSub)

        return

    def setupPromptRecoWorkflow(self):
        """
        _setupPromptRecoWorkflow_

        Populate WMBS with a real PromptReco workflow,
        every subscription must be unfinished at first
        """

        # Populate disk and WMBS
        testArguments = getTestArguments()

        workflowName = 'PromptReco_Run195360_Cosmics'
        workload = promptrecoWorkload(workflowName, testArguments)

        wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath = self.testDir)
        wmbsHelper.createTopLevelFileset()
        wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset)

        self.stateMap = {'AlcaSkim' : [],
                         'Merge' : [],
                         'Harvesting' : [],
                         'Processing Done' : []}
        self.orderedStates = ['AlcaSkim', 'Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.wmstatsWriter.insertGenericRequest({'_id' : workflowName})
        self.wmstatsWriter.updateRequestStatus(workflowName, 'Closed')

        topLevelTask = '/%s/Reco' % workflowName
        alcaSkimTask = '%s/AlcaSkim' % topLevelTask
        mergeTasks = ['%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics',
                      '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T',
                      '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics',
                      '%s/RecoMergewrite_AOD',
                      '%s/RecoMergewrite_DQM',
                      '%s/RecoMergewrite_RECO']
        harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask

        self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription)

        alcaSkimWorkflow = Workflow(name = workflowName, task = alcaSkimTask)
        alcaSkimWorkflow.load()
        alcarecoFileset = Fileset(name = '/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECO')
        alcarecoFileset.load()
        alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow)
        alcaSkimSub.load()
        self.stateMap['Merge'].append(alcaSkimSub)

        for task in mergeTasks:
            mergeTask = task % topLevelTask
            mergeWorkflow = Workflow(name = workflowName, task = mergeTask)
            mergeWorkflow.load()
            if 'AlcaSkim' in mergeTask:
                stream = mergeTask.split('/')[-1][13:]
                unmergedFileset = Fileset(name = '%s/unmerged-%s' % (alcaSkimTask, stream))
                unmergedFileset.load()
            else:
                dataTier = mergeTask.split('/')[-1].split('_')[-1]
                unmergedFileset = Fileset(name = '%s/unmerged-write_%s' % (topLevelTask, dataTier))
                unmergedFileset.load()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.load()
            self.stateMap['Harvesting'].append(mergeSub)

        harvestingWorkflow = Workflow(name = workflowName, task = harvestingTask)
        harvestingWorkflow.load()
        harvestingFileset = Fileset(name = '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-Merged')
        harvestingFileset.load()
        harvestingSub = Subscription(harvestingFileset, harvestingWorkflow)
        harvestingSub.load()
        self.stateMap['Processing Done'].append(harvestingSub)

        return

    def verifyStateTransitions(self, transitionMethod = 'markFinished', transitionTrigger = True):
        """
        _verifyStateTransitions_

        Utility method which goes through the list of states in self.orderedStates and
        finishes the tasks that demand a state transition in each step. This according
        to the defined transition method and trigger.
        It verifies that the request document in WMStats is moving according to the transitions
        """

        for idx in range(0, len(self.orderedStates) * 2):
            nextState = self.orderedStates[idx / 2]
            if (idx / 2) == 0:
                currentState = 'Closed'
            else:
                currentState = self.orderedStates[idx / 2 - 1]
            if idx % 2 == 0:
                for transitionObject in self.stateMap[nextState][:-1]:
                    method = getattr(transitionObject, transitionMethod)
                    method(transitionTrigger)
                self.plugin([], self.wmstatsWriter, self.wmstatsWriter)
                currentStateWorkflows = self.wmstatsWriter.workflowsByStatus([currentState], stale = False)
                nextStateWorkflows = self.wmstatsWriter.workflowsByStatus([nextState], stale = False)
                self.assertEqual(len(currentStateWorkflows), 1, 'Workflow moved incorrectly from %s' % currentState)
                self.assertEqual(len(nextStateWorkflows), 0, 'Workflow moved incorrectly to %s' % nextState)
            else:
                transitionObject = self.stateMap[nextState][-1]
                method = getattr(transitionObject, transitionMethod)
                method(transitionTrigger)
                self.plugin([], self.wmstatsWriter, self.wmstatsWriter)
                currentStateWorkflows = self.wmstatsWriter.workflowsByStatus([currentState], stale = False)
                nextStateWorkflows = self.wmstatsWriter.workflowsByStatus([nextState], stale = False)
                self.assertEqual(len(currentStateWorkflows), 0, 'Workflow did not move correctly from %s' % currentState)
                self.assertEqual(len(nextStateWorkflows), 1, 'Workflow did not move correctly to %s' % nextState)
        return

    def testA_RepackStates(self):
        """
        _testA_RepackStates_

        Setup an environment with a Repack workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupRepackWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions('markOpen', False)

        return

    def testB_ExpressStates(self):
        """
        _testB_ExpressStates_

        Setup an environment with a Express workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupExpressWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return

    def testC_PromptRecoStates(self):
        """
        _testC_PromptRecoStates_

        Setup an environment with a PromptReco workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupPromptRecoWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return
    parser = OptionParser()
    parser.set_usage("wmstats-request-status-chagne [agent_url:port]")

    parser.add_option("-r", "--request", dest="request", help="resquest name")

    parser.add_option("-s",
                      "--status",
                      dest="newstatus",
                      help="set to new status")

    (options, args) = parser.parse_args()

    if not options.request:
        print "request name needs to be set"
        sys.exit(1)

    if not options.newstatus:
        print "new status needs to be set"
        sys.exit(1)

    answer = raw_input("%s change to %s in wmstats db (yes, no)?" %
                       (options.request, options.newstatus))
    if not answer.lower() == "yes":
        print "Canceled"
        sys.exit(1)

    report = wmstats.updateRequestStatus(options.request, options.newstatus)

    print report
Beispiel #10
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config
        
    def setup(self, parameters):
        """
        Called at startup
        """
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck   = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL)
        self.centralCouchDBReader = WMStatsReader(self.config.TaskArchiver.centralWMStatsURL)
        
        if self.useReqMgrForCompletionCheck:
            self.deletableStates = ["announced"]
            self.centralCouchDBWriter = WMStatsWriter(self.config.TaskArchiver.centralWMStatsURL)
        else:
            # Tier0 case
            self.deletableStates = ["completed"]
            self.centralCouchDBWriter = self.wmstatsCouchDB
        
        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb  = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)
            logging.info("getting complete and announced requests")
            
            deletableWorkflows = self.centralCouchDBReader.workflowsByStatus(self.deletableStates)
            
            logging.info("Ready to delete %s" % deletableWorkflows)     
            for workflowName in deletableWorkflows:
                if self.cleanAllLocalCouchDB(workflowName):
                    self.centralCouchDBWriter.updateRequestStatus(workflowName, "normal-archived")
                    logging.info("status updated to normal-archived %s" % workflowName)
            
            abortedWorkflows = self.centralCouchDBReader.workflowsByStatus(["aborted-completed"])
            logging.info("Ready to delete aborted %s" % abortedWorkflows)
            for workflowName in abortedWorkflows:
                if self.cleanAllLocalCouchDB(workflowName):
                    self.centralCouchDBWriter.updateRequestStatus(workflowName, "aborted-archived")
                    logging.info("status updated to aborted-archived %s" % workflowName)

            #TODO: following code is temproraly - remove after production archived data is cleaned 
            removableWorkflows = self.centralCouchDBReader.workflowsByStatus(["archived"])
            
            logging.info("Ready to delete %s from wmagent_summary" % removableWorkflows)     
            for workflowName in removableWorkflows:
                logging.info("Deleting %s from WMAgent Summary Couch" % workflowName)
                report = self.deleteWorkflowFromJobCouch(workflowName, "WMStats")
                logging.info("%s docs deleted from wmagent_summary" % report)
                # only updatet he status when delete is successful
                # TODO: need to handle the case when there are multiple agent running the same request.
                if report["status"] == "ok":
                    self.centralCouchDBWriter.updateRequestStatus(workflowName, "normal-archived")
                    logging.info("status updated to normal-archived from archived (this is temp solution for production) %s" % workflowName)

        except Exception, ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
Beispiel #11
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config
        
    def setup(self, parameters):
        """
        Called at startup
        """
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck   = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL)
        self.centralCouchDBWriter = WMStatsWriter(self.config.TaskArchiver.centralWMStatsURL)
        self.centralCouchDBReader = WMStatsReader(self.config.TaskArchiver.centralWMStatsURL)
        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb  = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)
            logging.info("getting complete and announced requests")
            
            #TODO: define what is deletable status. Also add the code to delet summary document, 
            # request summary and job summary
            if self.useReqMgrForCompletionCheck:
                deletableWorkflows = self.centralCouchDBReader.workflowsByStatus(["announced"])
            else:
                deletableWorkflows = self.centralCouchDBReader.workflowsByStatus(["completed"])
            
            logging.info("Ready to delete %s" % deletableWorkflows)     
            for workflowName in deletableWorkflows:
                logging.info("Deleting %s from JobCouch" % workflowName)
                
                report = self.deleteWorkflowFromJobCouch(workflowName, "JobDump")
                logging.info("%s docs deleted from JobDump" % report)
                report = self.deleteWorkflowFromJobCouch(workflowName, "FWJRDump")
                logging.info("%s docs deleted from FWJRDump" % report)
                
                self.centralCouchDBWriter.updateRequestStatus(workflowName, "archived")
                logging.info("status updated to archived %s" % workflowName)
                
        except Exception, ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
Beispiel #12
0
class WMStatsTest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["WMStats"]
        self.testInit = TestInitCouchApp('WorkQueueServiceTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        self.testInit.setupCouch('wmstats_t', *self.couchApps)
        self.wmstatsWriter = WMStatsWriter(self.testInit.couchUrl, 'wmstats_t')
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testWMStatsWriter(self):
        # test getWork
        schema = generate_reqmgr_schema()
        self.assertEquals(self.wmstatsWriter.insertRequest(schema[0]), 'OK',
                          'insert fail')
        self.assertEquals(
            self.wmstatsWriter.updateRequestStatus(schema[0]['RequestName'],
                                                   "failed"), 'OK',
            'update fail')
        self.assertEquals(
            self.wmstatsWriter.updateRequestStatus("not_exist_schema",
                                                   "assigned"),
            'ERROR: request not found - not_exist_schema')
        self.assertEquals(
            self.wmstatsWriter.updateTeam(schema[0]['RequestName'], 'teamA'),
            'OK', 'update fail')
        self.assertEquals(
            self.wmstatsWriter.updateTeam("not_exist_schema", 'teamA'),
            'ERROR: request not found - not_exist_schema')
        totalStats = {
            'total_jobs': 100,
            'input_events': 1000,
            'input_lumis': 1234,
            'input_num_file': 5
        }
        self.assertEquals(
            self.wmstatsWriter.insertTotalStats(schema[0]['RequestName'],
                                                totalStats), 'OK',
            'update fail')
        self.assertEquals(
            self.wmstatsWriter.insertTotalStats("not_exist_schema",
                                                totalStats),
            'ERROR: request not found - not_exist_schema')
        spec1 = newWorkload(schema[0]['RequestName'])
        production = spec1.newTask("Production")
        production.setTaskType("Merge")
        production.setSiteWhitelist(['TEST_SITE'])
        self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec1), 'OK',
                          'update fail')
        spec2 = newWorkload("not_exist_schema")
        production = spec2.newTask("Production")
        production.setTaskType("Merge")
        self.assertEquals(self.wmstatsWriter.updateFromWMSpec(spec2),
                          'ERROR: request not found - not_exist_schema')
Beispiel #13
0
class Tier0FeederPoller(BaseWorkerThread):

    def __init__(self, config):
        """
        _init_

        """
        BaseWorkerThread.__init__(self)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package = "T0.WMBS",
                                     logger = logging,
                                     dbinterface = myThread.dbi)

        self.tier0ConfigFile = config.Tier0Feeder.tier0ConfigFile
        self.specDirectory = config.Tier0Feeder.specDirectory
        self.dropboxuser = config.Tier0Feeder.dropboxuser
        self.dropboxpass = config.Tier0Feeder.dropboxpass

        self.transferSystemBaseDir = getattr(config.Tier0Feeder, "transferSystemBaseDir", None)
        if self.transferSystemBaseDir != None:
            if not os.path.exists(self.transferSystemBaseDir):
                self.transferSystemBaseDir = None

        self.dqmUploadProxy = config.WMBSService.proxy

        self.localSummaryCouchDB = WMStatsWriter(config.AnalyticsDataCollector.localWMStatsURL)

        hltConfConnectUrl = config.HLTConfDatabase.connectUrl
        dbFactoryHltConf = DBFactory(logging, dburl = hltConfConnectUrl, options = {})
        dbInterfaceHltConf = dbFactoryHltConf.connect()
        daoFactoryHltConf = DAOFactory(package = "T0.WMBS",
                                       logger = logging,
                                       dbinterface = dbInterfaceHltConf)
        self.getHLTConfigDAO = daoFactoryHltConf(classname = "RunConfig.GetHLTConfig")

        storageManagerConnectUrl = config.StorageManagerDatabase.connectUrl
        dbFactoryStorageManager = DBFactory(logging, dburl = storageManagerConnectUrl, options = {})
        self.dbInterfaceStorageManager = dbFactoryStorageManager.connect()

        self.getExpressReadyRunsDAO = None
        if hasattr(config, "PopConLogDatabase"):
            popConLogConnectUrl = getattr(config.PopConLogDatabase, "connectUrl", None)
            if popConLogConnectUrl != None:
                dbFactoryPopConLog = DBFactory(logging, dburl = popConLogConnectUrl, options = {})
                dbInterfacePopConLog = dbFactoryPopConLog.connect()
                daoFactoryPopConLog = DAOFactory(package = "T0.WMBS",
                                                 logger = logging,
                                                 dbinterface = dbInterfacePopConLog)
                self.getExpressReadyRunsDAO = daoFactoryPopConLog(classname = "Tier0Feeder.GetExpressReadyRuns")

        return

    def algorithm(self, parameters = None):
        """
        _algorithm_

        """
        logging.debug("Running Tier0Feeder algorithm...")
        myThread = threading.currentThread()

        findNewRunsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewRuns")
        findNewRunStreamsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewRunStreams")
        findNewExpressRunsDAO = self.daoFactory(classname = "Tier0Feeder.FindNewExpressRuns")
        releaseExpressDAO = self.daoFactory(classname = "Tier0Feeder.ReleaseExpress")
        feedStreamersDAO = self.daoFactory(classname = "Tier0Feeder.FeedStreamers")
        markRepackInjectedDAO = self.daoFactory(classname = "Tier0Feeder.MarkRepackInjected")

        tier0Config = None
        try:
            tier0Config = loadConfigurationFile(self.tier0ConfigFile)
        except:
            # usually happens when there are syntax errors in the configuration
            logging.exception("Cannot load Tier0 configuration file, not configuring new runs and run/streams")

        # only configure new runs and run/streams if we have a valid Tier0 configuration
        if tier0Config != None:

            #
            # find new runs, setup global run settings and stream/dataset/trigger mapping
            #
            runHltkeys = findNewRunsDAO.execute(transaction = False)
            for run, hltkey in sorted(runHltkeys.items()):

                hltConfig = None

                # local runs have no hltkey and are configured differently
                if hltkey != None:

                    # retrieve HLT configuration and make sure it's usable
                    try:
                        hltConfig = self.getHLTConfigDAO.execute(hltkey, transaction = False)
                        if hltConfig['process'] == None or len(hltConfig['mapping']) == 0:
                            raise RuntimeError, "HLTConfDB query returned no process or mapping"
                    except:
                        logging.exception("Can't retrieve hltkey %s for run %d" % (hltkey, run))
                        continue

                try:
                    RunConfigAPI.configureRun(tier0Config, run, hltConfig)
                except:
                    logging.exception("Can't configure for run %d" % (run))

            #
            # find unconfigured run/stream with data
            # populate RunConfig, setup workflows/filesets/subscriptions
            # 
            runStreams = findNewRunStreamsDAO.execute(transaction = False)
            for run in sorted(runStreams.keys()):
                for stream in sorted(runStreams[run]):
                    try:
                        RunConfigAPI.configureRunStream(tier0Config,
                                                        run, stream,
                                                        self.specDirectory,
                                                        self.dqmUploadProxy)
                    except:
                        logging.exception("Can't configure for run %d and stream %s" % (run, stream))

        #
        # end runs which are active and have ended according to the EoR StorageManager records
        #
        RunLumiCloseoutAPI.endRuns(self.dbInterfaceStorageManager)

        #
        # release runs for Express
        #
        runs = findNewExpressRunsDAO.execute(transaction = False)

        if len(runs) > 0:

            binds = []
            for run in runs:
                binds.append( { 'RUN' : run } )

            if self.getExpressReadyRunsDAO != None:
                runs = self.getExpressReadyRunsDAO.execute(binds = binds, transaction = False)

            if len(runs) > 0:

                binds = []
                for run in runs:
                    binds.append( { 'RUN' : run } )

                releaseExpressDAO.execute(binds = binds, transaction = False)

        #
        # release runs for PromptReco
        #
        RunConfigAPI.releasePromptReco(tier0Config,
                                       self.specDirectory,
				       self.dqmUploadProxy)

        #
        # check if all datasets for a stream had their PromptReco released
        # then mark the repack workflow as injected (if we don't wait, the
        # task archiver will cleanup too early)
        #
        markRepackInjectedDAO.execute(transaction = False)

        #
        # close stream/lumis for run/streams that are active (fileset exists and open)
        #
        RunLumiCloseoutAPI.closeLumiSections(self.dbInterfaceStorageManager)

        #
        # feed new data into exisiting filesets
        #
        try:
            myThread.transaction.begin()
            feedStreamersDAO.execute(conn = myThread.transaction.conn, transaction = True)
        except:
            logging.exception("Can't feed data, bailing out...")
            raise
        else:
            myThread.transaction.commit()

        #
        # run ended and run/stream fileset open
        #    => check for complete lumi_closed record, all lumis finally closed and all data feed
        #          => if all conditions satisfied, close the run/stream fileset
        #
        RunLumiCloseoutAPI.closeRunStreamFilesets()

        #
        # check and delete active split lumis
        #
        RunLumiCloseoutAPI.checkActiveSplitLumis()

        #
        # insert workflows into CouchDB for monitoring
        #
        self.feedCouchMonitoring()

        #
        # Update Couch when Repack and Express have closed input filesets (analog to old T0 closeout)
        #
        self.closeOutRealTimeWorkflows()

        #
        # send repacked notifications to StorageManager
        #
        if self.transferSystemBaseDir != None:
            self.notifyStorageManager()

        #
        # upload PCL conditions to DropBox
        #
        ConditionUploadAPI.uploadConditions(self.dropboxuser, self.dropboxpass)

        return

    def feedCouchMonitoring(self):
        """
        _feedCouchMonitoring_

        check for workflows that haven't been uploaded to Couch for monitoring yet

        """
        getStreamerWorkflowsForMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.GetStreamerWorkflowsForMonitoring")
        getPromptRecoWorkflowsForMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.GetPromptRecoWorkflowsForMonitoring")
        markTrackedWorkflowMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.MarkTrackedWorkflowMonitoring")
        workflows = getStreamerWorkflowsForMonitoringDAO.execute()
        workflows += getPromptRecoWorkflowsForMonitoringDAO.execute()

        if len(workflows) == 0:
            logging.debug("No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            logging.debug(" Going to publish %d workflows" % len(workflows))
            for (workflowId, run, workflowName) in workflows:
                logging.info(" Publishing workflow %s to monitoring" % workflowName)
                doc = {}
                doc["_id"] =  workflowName
                doc["workflow"] =   workflowName
                doc["type"]     =   "tier0_request"
                doc["run"]      =   run
                response = self.localSummaryCouchDB.insertGenericRequest(doc)
                if response == "OK" or "EXISTS":
                    logging.info(" Successfully uploaded request %s" % workflowName)
                    # Here we have to trust the insert, if it doesn't happen will be easy to spot on the logs
                    markTrackedWorkflowMonitoringDAO.execute(workflowId)

        return

    def closeOutRealTimeWorkflows(self):
        """
        _closeOutRealTimeWorkflows_

        Updates couch with the closeout status of Repack and Express
        PromptReco should be closed out automatically

        """
        getNotClosedOutWorkflowsDAO = self.daoFactory(classname = "Tier0Feeder.GetNotClosedOutWorkflows")
        workflows = getNotClosedOutWorkflowsDAO.execute()

        if len(workflows) == 0:
            logging.debug("No workflows to publish to couch monitoring, doing nothing")

        if workflows:
            for workflow in workflows:
                (workflowId, filesetId, filesetOpen, workflowName) = workflow
                # find returns -1 if the string is not found
                if workflowName.find('PromptReco') >= 0:
                    logging.debug("Closing out instantaneously PromptReco Workflow %s" % workflowName)
                    self.updateClosedState(workflowName, workflowId)
                else :
                    # Check if fileset (which you already know) is closed or not
                    # FIXME: No better way to do it? what comes from the DAO is a string, casting bool or int doesn't help much.
                    # Works like that :
                    if filesetOpen == '0':
                        self.updateClosedState(workflowName, workflowId)

        return

    def updateClosedState(self, workflowName, workflowId):
        """
        _updateClosedState_

        Mark a workflow as Closed

        """
        markCloseoutWorkflowMonitoringDAO = self.daoFactory(classname = "Tier0Feeder.MarkCloseoutWorkflowMonitoring")

        response = self.localSummaryCouchDB.updateRequestStatus(workflowName, 'Closed')

        if response == "OK" or "EXISTS":
            logging.debug("Successfully closed workflow %s" % workflowName)
            markCloseoutWorkflowMonitoringDAO.execute(workflowId)

        return

    def notifyStorageManager(self):
        """
        _notifyStorageManager_

        Find all finished streamers for closed all run/stream
        Send the notification message to StorageManager
        Update the streamer status to finished (deleted = 1)

        """
        getFinishedStreamersDAO = self.daoFactory(classname = "SMNotification.GetFinishedStreamers")
        markStreamersFinishedDAO = self.daoFactory(classname = "SMNotification.MarkStreamersFinished")

        allFinishedStreamers = getFinishedStreamersDAO.execute(transaction = False)

        num = len(allFinishedStreamers)/50
        for finishedStreamers in [allFinishedStreamers[i::num] for i in range(num)]:

            streamers = []
            filenameParams = ""

            for (id, lfn) in finishedStreamers:
                streamers.append(id)
                filenameParams += "-FILENAME %s " % os.path.basename(lfn)

            logging.debug("Notifying transfer system about processed streamers")
            p = subprocess.Popen("/bin/bash",stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
            output, error = p.communicate("""
            export T0_BASE_DIR=%s
            export T0ROOT=${T0_BASE_DIR}/T0
            export CONFIG=${T0_BASE_DIR}/Config/TransferSystem_CERN.cfg
 
            export PERL5LIB=${T0ROOT}/perl_lib
 
            ${T0ROOT}/operations/sendRepackedStatus.pl --config $CONFIG %s
            """ % (self.transferSystemBaseDir, filenameParams))

            if len(error) > 0:
                logging.error("ERROR: Could not notify transfer system about processed streamers")
                logging.error("ERROR: %s" % error)

            markStreamersFinishedDAO.execute(streamers, transaction = False)

        return

    def terminate(self, params):
        """
        _terminate_

        Kill the code after one final pass when called by the master thread.

        """
        logging.debug("terminating immediately")
Beispiel #14
0
class CleanCouchPoller(BaseWorkerThread):
    """
    Cleans up local couch db according the the given condition.
    1. Cleans local couch db when request is completed and reported to cental db.
       This will clean up local couchdb, local summary db, local queue
       
    2. Cleans old couchdoc which is created older than the time threshold
    
    """
    def __init__(self, config):
        """
        Initialize config
        """
        BaseWorkerThread.__init__(self)
        # set the workqueue service for REST call
        self.config = config

    def setup(self, parameters):
        """
        Called at startup
        """
        # set the connection for local couchDB call
        self.useReqMgrForCompletionCheck = getattr(
            self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
        self.wmstatsCouchDB = WMStatsWriter(
            self.config.TaskArchiver.localWMStatsURL)
        self.centralCouchDBReader = WMStatsReader(
            self.config.TaskArchiver.centralWMStatsURL)

        if self.useReqMgrForCompletionCheck:
            self.deletableStates = ["announced"]
            self.centralCouchDBWriter = WMStatsWriter(
                self.config.TaskArchiver.centralWMStatsURL)
        else:
            # Tier0 case
            self.deletableStates = ["completed"]
            self.centralCouchDBWriter = self.wmstatsCouchDB

        jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
        jobDBName = self.config.JobStateMachine.couchDBName
        self.jobCouchdb = CouchServer(jobDBurl)
        self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" %
                                                            jobDBName)
        self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" %
                                                            jobDBName)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Cleaning up the old request docs")
            report = self.wmstatsCouchDB.deleteOldDocs(
                self.config.TaskArchiver.DataKeepDays)
            logging.info("%s docs deleted" % report)
            logging.info("getting complete and announced requests")

            deletableWorkflows = self.centralCouchDBReader.workflowsByStatus(
                self.deletableStates)

            logging.info("Ready to delete %s" % deletableWorkflows)
            for workflowName in deletableWorkflows:
                if self.cleanAllLocalCouchDB(workflowName):
                    self.centralCouchDBWriter.updateRequestStatus(
                        workflowName, "normal-archived")
                    logging.info("status updated to normal-archived %s" %
                                 workflowName)

            abortedWorkflows = self.centralCouchDBReader.workflowsByStatus(
                ["aborted-completed"])
            logging.info("Ready to delete aborted %s" % abortedWorkflows)
            for workflowName in abortedWorkflows:
                if self.cleanAllLocalCouchDB(workflowName):
                    self.centralCouchDBWriter.updateRequestStatus(
                        workflowName, "aborted-archived")
                    logging.info("status updated to aborted-archived %s" %
                                 workflowName)

            #TODO: following code is temproraly - remove after production archived data is cleaned
            removableWorkflows = self.centralCouchDBReader.workflowsByStatus(
                ["archived"])

            logging.info("Ready to delete %s from wmagent_summary" %
                         removableWorkflows)
            for workflowName in removableWorkflows:
                logging.info("Deleting %s from WMAgent Summary Couch" %
                             workflowName)
                report = self.deleteWorkflowFromJobCouch(
                    workflowName, "WMStats")
                logging.info("%s docs deleted from wmagent_summary" % report)
                # only updatet he status when delete is successful
                # TODO: need to handle the case when there are multiple agent running the same request.
                if report["status"] == "ok":
                    self.centralCouchDBWriter.updateRequestStatus(
                        workflowName, "normal-archived")
                    logging.info(
                        "status updated to normal-archived from archived (this is temp solution for production) %s"
                        % workflowName)

        except Exception, ex:
            logging.error(str(ex))
            logging.error("Error occurred, will try again next cycle")
        print("AnalyticsDataCollector.centralWMStatsURL is not specified")
        sys.exit(1)
        
    parser = OptionParser()
    parser.set_usage("wmstats-request-status-chagne [agent_url:port]")
    
    parser.add_option("-r", "--request", dest = "request",
                      help = "resquest name")
    
    parser.add_option("-s", "--status", dest = "newstatus",
                      help = "set to new status")
    
    (options, args) = parser.parse_args()
    
    if not options.request:
        print("request name needs to be set")
        sys.exit(1)
    
    if not options.newstatus:
        print("new status needs to be set")
        sys.exit(1)
    
    answer = raw_input("%s change to %s in wmstats db (yes, no)?" % (options.request, options.newstatus))
    if not answer.lower() == "yes":
        print("Canceled")
        sys.exit(1)
    
    report = wmstats.updateRequestStatus(options.request, options.newstatus)
    
    print(report)
Beispiel #16
0
class Tier0PluginTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Setup the test environment
        """
        self.testInit = TestInit(__file__)
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(["WMCore.WMBS"])
        self.wmstatsCouchDB = 'wmstats_plugin_t'
        self.testInit.setupCouch(self.wmstatsCouchDB, 'WMStats')
        self.testDir = self.testInit.generateWorkDir()

        self.wmstatsWriter = WMStatsWriter(os.environ['COUCHURL'],
                                           self.wmstatsCouchDB)

        self.stateMap = {}
        self.orderedStates = []
        self.plugin = None

        return

    def tearDown(self):
        """
        _tearDown_

        Clear databases and delete files
        """
        self.testInit.tearDownCouch()
        self.testInit.clearDatabase()
        self.testInit.delWorkDir()

        return

    def setupRepackWorkflow(self):
        """
        _setupRepackWorkflow_

        Populate WMBS with a repack-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Repack_Run481516_StreamZ'
        mergeTasks = [
            'RepackMergewrite_QuadElectron_RAW',
            'RepackMergewrite_TriPhoton_RAW',
            'RepackMergewrite_SingleNeutrino_RAW'
        ]

        self.stateMap = {'Merge': [], 'Processing Done': []}
        self.orderedStates = ['Merge', 'Processing Done']

        # Populate WMStats
        self.wmstatsWriter.insertGenericRequest({'_id': workflowName})
        self.wmstatsWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        repackTask = workload.newTask('Repack')
        for task in mergeTasks:
            repackTask.addTask(task)
        repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW')

        specPath = os.path.join(self.testDir, 'Repack.pkl')
        workload.save(specPath)

        # Populate WMBS
        topFileset = Fileset(name='TestStreamerFileset')
        topFileset.create()

        options = {
            'spec': specPath,
            'owner': 'ItsAMeMario',
            'name': workflowName,
            'wfType': 'tier0'
        }
        topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(topFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topFileset)
        for task in mergeTasks:
            mergeWorkflow = Workflow(task='/%s/Repack/%s' %
                                     (workflowName, task),
                                     **options)
            mergeWorkflow.create()
            unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task)
            unmergedFileset.create()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.create()
            self.stateMap['Processing Done'].append(unmergedFileset)
        cleanupWorkflow = Workflow(
            task=
            '/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW',
            **options)
        cleanupWorkflow.create()
        unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup')
        unmergedFileset.create()
        cleanupSub = Subscription(unmergedFileset, cleanupWorkflow)
        cleanupSub.create()

        return

    def setupExpressWorkflow(self):
        """
        _setupExpressWorkflow_

        Populate WMBS with a express-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Express_Run481516_StreamZFast'
        secondLevelTasks = [
            'ExpressMergewrite_StreamZFast_DQM',
            'ExpressMergewrite_ExpressPhysics_FEVT',
            'ExpressAlcaSkimwrite_StreamZFast_ALCARECO',
            'ExpressCleanupUnmergedwrite_StreamZFast_DQM',
            'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT',
            'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO'
        ]
        alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd'
        dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMDQMHarvestMerged'

        self.stateMap = {'Merge': [], 'Harvesting': [], 'Processing Done': []}
        self.orderedStates = ['Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.wmstatsWriter.insertGenericRequest({'_id': workflowName})
        self.wmstatsWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        expressTask = workload.newTask('Express')
        for task in secondLevelTasks:
            secondLevelTask = expressTask.addTask(task)
            if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO':
                secondLevelTask.addTask(alcaHarvestTask)
            elif task == 'ExpressMergewrite_StreamZFast_DQM':
                secondLevelTask.addTask(dqmHarvestTask)

        specPath = os.path.join(self.testDir, 'Express.pkl')
        workload.save(specPath)

        # Populate WMBS
        sharedFileset = Fileset(name='TestFileset')
        sharedFileset.create()
        sharedFileset.markOpen(False)

        options = {
            'spec': specPath,
            'owner': 'ItsAMeMario',
            'name': workflowName,
            'wfType': 'tier0'
        }
        topLevelWorkflow = Workflow(task='/%s/Express' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(sharedFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topLevelSub)
        for task in filter(lambda x: not x.count('CleanupUnmerged'),
                           secondLevelTasks):
            secondLevelWorkflow = Workflow(task='/%s/Express/%s' %
                                           (workflowName, task),
                                           **options)
            secondLevelWorkflow.create()
            mergeSub = Subscription(sharedFileset, secondLevelWorkflow)
            mergeSub.create()
            self.stateMap['Harvesting'].append(mergeSub)

        for (parent, child) in [
            ('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask),
            ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)
        ]:
            harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' %
                                          (workflowName, parent, child),
                                          **options)
            harvestingWorkflow.create()
            harvestingSub = Subscription(sharedFileset, harvestingWorkflow)
            harvestingSub.create()
            self.stateMap['Processing Done'].append(harvestingSub)

        return

    def setupPromptRecoWorkflow(self):
        """
        _setupPromptRecoWorkflow_

        Populate WMBS with a real PromptReco workflow,
        every subscription must be unfinished at first
        """

        # Populate disk and WMBS
        testArguments = getTestArguments()

        workflowName = 'PromptReco_Run195360_Cosmics'
        workload = promptrecoWorkload(workflowName, testArguments)

        wmbsHelper = WMBSHelper(workload,
                                'Reco',
                                'SomeBlock',
                                cachepath=self.testDir)
        wmbsHelper.createTopLevelFileset()
        wmbsHelper.createSubscription(wmbsHelper.topLevelTask,
                                      wmbsHelper.topLevelFileset)

        self.stateMap = {
            'AlcaSkim': [],
            'Merge': [],
            'Harvesting': [],
            'Processing Done': []
        }
        self.orderedStates = [
            'AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'
        ]

        # Populate WMStats
        self.wmstatsWriter.insertGenericRequest({'_id': workflowName})
        self.wmstatsWriter.updateRequestStatus(workflowName, 'Closed')

        topLevelTask = '/%s/Reco' % workflowName
        alcaSkimTask = '%s/AlcaSkim' % topLevelTask
        mergeTasks = [
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics',
            '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM',
            '%s/RecoMergewrite_RECO'
        ]
        harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged' % topLevelTask

        self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription)

        alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask)
        alcaSkimWorkflow.load()
        alcarecoFileset = Fileset(
            name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECO')
        alcarecoFileset.load()
        alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow)
        alcaSkimSub.load()
        self.stateMap['Merge'].append(alcaSkimSub)

        for task in mergeTasks:
            mergeTask = task % topLevelTask
            mergeWorkflow = Workflow(name=workflowName, task=mergeTask)
            mergeWorkflow.load()
            if 'AlcaSkim' in mergeTask:
                stream = mergeTask.split('/')[-1][13:]
                unmergedFileset = Fileset(name='%s/unmerged-%s' %
                                          (alcaSkimTask, stream))
                unmergedFileset.load()
            else:
                dataTier = mergeTask.split('/')[-1].split('_')[-1]
                unmergedFileset = Fileset(name='%s/unmerged-write_%s' %
                                          (topLevelTask, dataTier))
                unmergedFileset.load()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.load()
            self.stateMap['Harvesting'].append(mergeSub)

        harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask)
        harvestingWorkflow.load()
        harvestingFileset = Fileset(
            name=
            '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-Merged'
        )
        harvestingFileset.load()
        harvestingSub = Subscription(harvestingFileset, harvestingWorkflow)
        harvestingSub.load()
        self.stateMap['Processing Done'].append(harvestingSub)

        return

    def verifyStateTransitions(self,
                               transitionMethod='markFinished',
                               transitionTrigger=True):
        """
        _verifyStateTransitions_

        Utility method which goes through the list of states in self.orderedStates and
        finishes the tasks that demand a state transition in each step. This according
        to the defined transition method and trigger.
        It verifies that the request document in WMStats is moving according to the transitions
        """

        for idx in range(0, len(self.orderedStates) * 2):
            nextState = self.orderedStates[idx / 2]
            if (idx / 2) == 0:
                currentState = 'Closed'
            else:
                currentState = self.orderedStates[idx / 2 - 1]
            if idx % 2 == 0:
                for transitionObject in self.stateMap[nextState][:-1]:
                    method = getattr(transitionObject, transitionMethod)
                    method(transitionTrigger)
                self.plugin([], self.wmstatsWriter, self.wmstatsWriter)
                currentStateWorkflows = self.wmstatsWriter.workflowsByStatus(
                    [currentState], stale=False)
                nextStateWorkflows = self.wmstatsWriter.workflowsByStatus(
                    [nextState], stale=False)
                self.assertEqual(
                    len(currentStateWorkflows), 1,
                    'Workflow moved incorrectly from %s' % currentState)
                self.assertEqual(
                    len(nextStateWorkflows), 0,
                    'Workflow moved incorrectly to %s' % nextState)
            else:
                transitionObject = self.stateMap[nextState][-1]
                method = getattr(transitionObject, transitionMethod)
                method(transitionTrigger)
                self.plugin([], self.wmstatsWriter, self.wmstatsWriter)
                currentStateWorkflows = self.wmstatsWriter.workflowsByStatus(
                    [currentState], stale=False)
                nextStateWorkflows = self.wmstatsWriter.workflowsByStatus(
                    [nextState], stale=False)
                self.assertEqual(
                    len(currentStateWorkflows), 0,
                    'Workflow did not move correctly from %s' % currentState)
                self.assertEqual(
                    len(nextStateWorkflows), 1,
                    'Workflow did not move correctly to %s' % nextState)
        return

    def testA_RepackStates(self):
        """
        _testA_RepackStates_

        Setup an environment with a Repack workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupRepackWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions('markOpen', False)

        return

    def testB_ExpressStates(self):
        """
        _testB_ExpressStates_

        Setup an environment with a Express workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupExpressWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return

    def testC_PromptRecoStates(self):
        """
        _testC_PromptRecoStates_

        Setup an environment with a PromptReco workflow
        and traverse through the different states.
        Check that the transitions are sane.
        """
        # Set the environment
        self.setupPromptRecoWorkflow()
        self.plugin = Tier0Plugin()

        # Verify the transitions
        self.verifyStateTransitions()

        return
Beispiel #17
0
def updateRequestStatus(couchURL, requestList, status):
    ww = WMStatsWriter(couchURL)
    for request in requestList:
        ww.updateRequestStatus(request, status)
        print("%s is udated to %s" % (request, status))