예제 #1
0
    def testParseT0ProcVer(self):
        """
        _testParseT0Procver_

        Check that the parser function process correctly different
        possibilities of processing versions sent by the T0
        """
        procVerJustNumber = '1'
        procVerWithV = 'v1'
        procVerWithString = 'PromptSkim-v1'
        procVerWrong = 'ProcVer-v1-Wrong'

        result = parseT0ProcVer(procVerJustNumber)
        self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : None})

        result = parseT0ProcVer(procVerWithV)
        self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : None})

        result = parseT0ProcVer(procVerWithV, 'PromptSkim')
        self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : 'PromptSkim'})

        result = parseT0ProcVer(procVerWithString)
        self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : 'PromptSkim'})

        result = parseT0ProcVer(procVerWithString, 'Minor')
        self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : 'PromptSkim'})

        result = parseT0ProcVer(procVerJustNumber, 'TestString')
        self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : 'TestString'})

        self.assertRaises(Exception, parseT0ProcVer, procVerWrong)
예제 #2
0
    def testParseT0ProcVer(self):
        """
        _testParseT0Procver_

        Check that the parser function process correctly different
        possibilities of processing versions sent by the T0
        """
        procVerJustNumber = '1'
        procVerWithV = 'v1'
        procVerWithString = 'PromptSkim-v1'
        procVerWrong = 'ProcVer-v1-Wrong'

        result = parseT0ProcVer(procVerJustNumber)
        self.assertEqual(result, {'ProcVer': 1, 'ProcString': None})

        result = parseT0ProcVer(procVerWithV)
        self.assertEqual(result, {'ProcVer': 1, 'ProcString': None})

        result = parseT0ProcVer(procVerWithV, 'PromptSkim')
        self.assertEqual(result, {'ProcVer': 1, 'ProcString': 'PromptSkim'})

        result = parseT0ProcVer(procVerWithString)
        self.assertEqual(result, {'ProcVer': 1, 'ProcString': 'PromptSkim'})

        result = parseT0ProcVer(procVerWithString, 'Minor')
        self.assertEqual(result, {'ProcVer': 1, 'ProcString': 'PromptSkim'})

        result = parseT0ProcVer(procVerJustNumber, 'TestString')
        self.assertEqual(result, {'ProcVer': 1, 'ProcString': 'TestString'})

        self.assertRaises(Exception, parseT0ProcVer, procVerWrong)
예제 #3
0
    def buildWorkload(self):
        """
        _buildWorkload_

        Build the workload given all of the input parameters.

        Not that there will be LogCollect tasks created for each processing
        task and Cleanup tasks created for each merge task.

        """
        (self.inputPrimaryDataset, self.inputProcessedDataset,
         self.inputDataTier) = self.inputDataset[1:].split("/")

        workload = self.createWorkload()
        workload.setDashboardActivity("tier0")
        self.reportWorkflowToDashboard(workload.getDashboardActivity())
        workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo,
                                         self.procJobSplitArgs)

        cmsswStepType = "CMSSW"
        taskType = "Processing"
        if self.multicore:
            taskType = "MultiProcessing"

        recoOutputs = []
        for dataTier in self.writeTiers:
            recoOutputs.append({
                'dataTier': dataTier,
                'eventContent': dataTier,
                'moduleLabel': "write_%s" % dataTier
            })

        recoTask = workload.newTask("Reco")
        recoOutMods = self.setupProcessingTask(recoTask,
                                               taskType,
                                               self.inputDataset,
                                               scenarioName=self.procScenario,
                                               scenarioFunc="promptReco",
                                               scenarioArgs={
                                                   'globalTag': self.globalTag,
                                                   'skims': self.alcaSkims,
                                                   'dqmSeq': self.dqmSequences,
                                                   'outputs': recoOutputs
                                               },
                                               splitAlgo=self.procJobSplitAlgo,
                                               splitArgs=self.procJobSplitArgs,
                                               stepType=cmsswStepType,
                                               forceUnmerged=True)
        if self.doLogCollect:
            self.addLogCollectTask(recoTask)

        recoMergeTasks = {}
        for recoOutLabel, recoOutInfo in recoOutMods.items():
            if recoOutInfo['dataTier'] != "ALCARECO":
                mergeTask = self.addMergeTask(recoTask,
                                              self.procJobSplitAlgo,
                                              recoOutLabel,
                                              doLogCollect=self.doLogCollect)
                recoMergeTasks[recoOutInfo['dataTier']] = mergeTask

            else:
                alcaTask = recoTask.addTask("AlcaSkim")
                alcaOutMods = self.setupProcessingTask(
                    alcaTask,
                    taskType,
                    inputStep=recoTask.getStep("cmsRun1"),
                    inputModule=recoOutLabel,
                    scenarioName=self.procScenario,
                    scenarioFunc="alcaSkim",
                    scenarioArgs={
                        'globalTag': self.globalTag,
                        'skims': self.alcaSkims,
                        'primaryDataset': self.inputPrimaryDataset
                    },
                    splitAlgo="WMBSMergeBySize",
                    splitArgs={
                        "max_merge_size": self.maxMergeSize,
                        "min_merge_size": self.minMergeSize,
                        "max_merge_events": self.maxMergeEvents
                    },
                    stepType=cmsswStepType)
                if self.doLogCollect:
                    self.addLogCollectTask(alcaTask,
                                           taskName="AlcaSkimLogCollect")
                self.addCleanupTask(recoTask, recoOutLabel)

                for alcaOutLabel, alcaOutInfo in alcaOutMods.items():
                    self.addMergeTask(alcaTask,
                                      self.procJobSplitAlgo,
                                      alcaOutLabel,
                                      doLogCollect=self.doLogCollect)

        for promptSkim in self.promptSkims:
            if not promptSkim.DataTier in recoMergeTasks:
                error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier
                error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys(
                )
                error += 'That should be in the relevant skimConfig in T0AST'
                logging.error(error)
                raise Exception

            mergeTask = recoMergeTasks[promptSkim.DataTier]
            skimTask = mergeTask.addTask(promptSkim.SkimName)
            parentCmsswStep = mergeTask.getStep('cmsRun1')

            parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion,
                                           'PromptSkim')
            self.processingString = parsedProcVer["ProcString"]
            self.processingVersion = parsedProcVer["ProcVer"]

            if promptSkim.TwoFileRead:
                self.skimJobSplitArgs['include_parents'] = True
            else:
                self.skimJobSplitArgs['include_parents'] = False

            configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName)
            configCacheUrl = self.configCacheUrl or self.couchURL
            injectIntoConfigCache(self.frameworkVersion, self.scramArch,
                                  self.initCommand, promptSkim.ConfigURL,
                                  configLabel, configCacheUrl,
                                  self.couchDBName, self.envPath, self.binPath)
            try:
                configCache = ConfigCache(configCacheUrl, self.couchDBName)
                configCacheID = configCache.getIDFromLabel(configLabel)
                if configCacheID:
                    logging.error(
                        "The configuration was not uploaded to couch")
                    raise Exception
            except Exception:
                logging.error(
                    "There was an exception loading the config out of the")
                logging.error(
                    "ConfigCache.  Check the scramOutput.log file in the")
                logging.error(
                    "PromptSkimScheduler directory to find out what went")
                logging.error("wrong.")
                raise

            outputMods = self.setupProcessingTask(
                skimTask,
                "Skim",
                inputStep=parentCmsswStep,
                inputModule="Merged",
                couchURL=self.couchURL,
                couchDBName=self.couchDBName,
                configCacheUrl=self.configCacheUrl,
                configDoc=configCacheID,
                splitAlgo=self.skimJobSplitAlgo,
                splitArgs=self.skimJobSplitArgs)
            if self.doLogCollect:
                self.addLogCollectTask(skimTask,
                                       taskName="%sLogCollect" %
                                       promptSkim.SkimName)

            for outputModuleName in outputMods.keys():
                self.addMergeTask(skimTask,
                                  self.skimJobSplitAlgo,
                                  outputModuleName,
                                  doLogCollect=self.doLogCollect)

        return workload
예제 #4
0
파일: PromptReco.py 프로젝트: cinquo/WMCore
    def buildWorkload(self):
        """
        _buildWorkload_

        Build the workload given all of the input parameters.

        Not that there will be LogCollect tasks created for each processing
        task and Cleanup tasks created for each merge task.

        """
        (self.inputPrimaryDataset, self.inputProcessedDataset,
         self.inputDataTier) = self.inputDataset[1:].split("/")

        workload = self.createWorkload()
        workload.setDashboardActivity("tier0")
        self.reportWorkflowToDashboard(workload.getDashboardActivity())
        workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo,
                                         self.procJobSplitArgs)

        cmsswStepType = "CMSSW"
        taskType = "Processing"
        if self.multicore:
            taskType = "MultiProcessing"

        recoOutputs = []
        for dataTier in self.writeTiers:
            recoOutputs.append( { 'dataTier' : dataTier,
                                  'eventContent' : dataTier,
                                  'moduleLabel' : "write_%s" % dataTier } )

        recoTask = workload.newTask("Reco")
        recoOutMods = self.setupProcessingTask(recoTask, taskType, self.inputDataset,
                                               scenarioName = self.procScenario,
                                               scenarioFunc = "promptReco",
                                               scenarioArgs = { 'globalTag' : self.globalTag,
                                                                'skims' : self.alcaSkims,
                                                                'dqmSeq' : self.dqmSequences,
                                                                'outputs' : recoOutputs },
                                               splitAlgo = self.procJobSplitAlgo,
                                               splitArgs = self.procJobSplitArgs,
                                               stepType = cmsswStepType,
                                               forceUnmerged = True)
        if self.doLogCollect:
            self.addLogCollectTask(recoTask)

        recoMergeTasks = {}
        for recoOutLabel, recoOutInfo in recoOutMods.items():
            if recoOutInfo['dataTier'] != "ALCARECO":
                mergeTask = self.addMergeTask(recoTask, self.procJobSplitAlgo, recoOutLabel,
                                              doLogCollect = self.doLogCollect)
                recoMergeTasks[recoOutInfo['dataTier']] = mergeTask

            else:
                alcaTask = recoTask.addTask("AlcaSkim")
                alcaOutMods = self.setupProcessingTask(alcaTask, taskType,
                                                       inputStep = recoTask.getStep("cmsRun1"),
                                                       inputModule = recoOutLabel,
                                                       scenarioName = self.procScenario,
                                                       scenarioFunc = "alcaSkim",
                                                       scenarioArgs = { 'globalTag' : self.globalTag,
                                                                        'skims' : self.alcaSkims,
                                                                        'primaryDataset' : self.inputPrimaryDataset },
                                                       splitAlgo = "WMBSMergeBySize",
                                                       splitArgs = {"max_merge_size": self.maxMergeSize,
                                                                    "min_merge_size": self.minMergeSize,
                                                                    "max_merge_events": self.maxMergeEvents},
                                                       stepType = cmsswStepType)
                if self.doLogCollect:
                    self.addLogCollectTask(alcaTask, taskName = "AlcaSkimLogCollect")
                self.addCleanupTask(recoTask, recoOutLabel)

                for alcaOutLabel, alcaOutInfo in alcaOutMods.items():
                    self.addMergeTask(alcaTask, self.procJobSplitAlgo, alcaOutLabel,
                                      doLogCollect = self.doLogCollect)

        for promptSkim in self.promptSkims:
            if not promptSkim.DataTier in recoMergeTasks:
                error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier
                error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys()
                error += 'That should be in the relevant skimConfig in T0AST'
                logging.error(error)
                raise Exception

            mergeTask = recoMergeTasks[promptSkim.DataTier]
            skimTask = mergeTask.addTask(promptSkim.SkimName)
            parentCmsswStep = mergeTask.getStep('cmsRun1')

            parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion,
                                           'PromptSkim')
            self.processingString = parsedProcVer["ProcString"]
            self.processingVersion = parsedProcVer["ProcVer"]

            if promptSkim.TwoFileRead:
                self.skimJobSplitArgs['include_parents'] = True
            else:
                self.skimJobSplitArgs['include_parents'] = False

            configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName)
            configCacheUrl = self.configCacheUrl or self.couchURL
            injectIntoConfigCache(self.frameworkVersion, self.scramArch,
                                       self.initCommand, promptSkim.ConfigURL, configLabel,
                                       configCacheUrl, self.couchDBName,
                                       self.envPath, self.binPath)
            try:
                configCache = ConfigCache(configCacheUrl, self.couchDBName)
                configCacheID = configCache.getIDFromLabel(configLabel)
                if configCacheID:
                    logging.error("The configuration was not uploaded to couch")
                    raise Exception
            except Exception:
                logging.error("There was an exception loading the config out of the")
                logging.error("ConfigCache.  Check the scramOutput.log file in the")
                logging.error("PromptSkimScheduler directory to find out what went")
                logging.error("wrong.")
                raise

            outputMods = self.setupProcessingTask(skimTask, "Skim", inputStep = parentCmsswStep, inputModule = "Merged",
                                                  couchURL = self.couchURL, couchDBName = self.couchDBName,
                                                  configCacheUrl = self.configCacheUrl,
                                                  configDoc = configCacheID, splitAlgo = self.skimJobSplitAlgo,
                                                  splitArgs = self.skimJobSplitArgs)
            if self.doLogCollect:
                self.addLogCollectTask(skimTask, taskName = "%sLogCollect" % promptSkim.SkimName)

            for outputModuleName in outputMods.keys():
                self.addMergeTask(skimTask, self.skimJobSplitAlgo, outputModuleName,
                                  doLogCollect = self.doLogCollect)

        return workload