def testParseT0ProcVer(self): """ _testParseT0Procver_ Check that the parser function process correctly different possibilities of processing versions sent by the T0 """ procVerJustNumber = '1' procVerWithV = 'v1' procVerWithString = 'PromptSkim-v1' procVerWrong = 'ProcVer-v1-Wrong' result = parseT0ProcVer(procVerJustNumber) self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : None}) result = parseT0ProcVer(procVerWithV) self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : None}) result = parseT0ProcVer(procVerWithV, 'PromptSkim') self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : 'PromptSkim'}) result = parseT0ProcVer(procVerWithString) self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : 'PromptSkim'}) result = parseT0ProcVer(procVerWithString, 'Minor') self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : 'PromptSkim'}) result = parseT0ProcVer(procVerJustNumber, 'TestString') self.assertEqual(result, {'ProcVer' : 1, 'ProcString' : 'TestString'}) self.assertRaises(Exception, parseT0ProcVer, procVerWrong)
def testParseT0ProcVer(self): """ _testParseT0Procver_ Check that the parser function process correctly different possibilities of processing versions sent by the T0 """ procVerJustNumber = '1' procVerWithV = 'v1' procVerWithString = 'PromptSkim-v1' procVerWrong = 'ProcVer-v1-Wrong' result = parseT0ProcVer(procVerJustNumber) self.assertEqual(result, {'ProcVer': 1, 'ProcString': None}) result = parseT0ProcVer(procVerWithV) self.assertEqual(result, {'ProcVer': 1, 'ProcString': None}) result = parseT0ProcVer(procVerWithV, 'PromptSkim') self.assertEqual(result, {'ProcVer': 1, 'ProcString': 'PromptSkim'}) result = parseT0ProcVer(procVerWithString) self.assertEqual(result, {'ProcVer': 1, 'ProcString': 'PromptSkim'}) result = parseT0ProcVer(procVerWithString, 'Minor') self.assertEqual(result, {'ProcVer': 1, 'ProcString': 'PromptSkim'}) result = parseT0ProcVer(procVerJustNumber, 'TestString') self.assertEqual(result, {'ProcVer': 1, 'ProcString': 'TestString'}) self.assertRaises(Exception, parseT0ProcVer, procVerWrong)
def buildWorkload(self): """ _buildWorkload_ Build the workload given all of the input parameters. Not that there will be LogCollect tasks created for each processing task and Cleanup tasks created for each merge task. """ (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() workload.setDashboardActivity("tier0") self.reportWorkflowToDashboard(workload.getDashboardActivity()) workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo, self.procJobSplitArgs) cmsswStepType = "CMSSW" taskType = "Processing" if self.multicore: taskType = "MultiProcessing" recoOutputs = [] for dataTier in self.writeTiers: recoOutputs.append({ 'dataTier': dataTier, 'eventContent': dataTier, 'moduleLabel': "write_%s" % dataTier }) recoTask = workload.newTask("Reco") recoOutMods = self.setupProcessingTask(recoTask, taskType, self.inputDataset, scenarioName=self.procScenario, scenarioFunc="promptReco", scenarioArgs={ 'globalTag': self.globalTag, 'skims': self.alcaSkims, 'dqmSeq': self.dqmSequences, 'outputs': recoOutputs }, splitAlgo=self.procJobSplitAlgo, splitArgs=self.procJobSplitArgs, stepType=cmsswStepType, forceUnmerged=True) if self.doLogCollect: self.addLogCollectTask(recoTask) recoMergeTasks = {} for recoOutLabel, recoOutInfo in recoOutMods.items(): if recoOutInfo['dataTier'] != "ALCARECO": mergeTask = self.addMergeTask(recoTask, self.procJobSplitAlgo, recoOutLabel, doLogCollect=self.doLogCollect) recoMergeTasks[recoOutInfo['dataTier']] = mergeTask else: alcaTask = recoTask.addTask("AlcaSkim") alcaOutMods = self.setupProcessingTask( alcaTask, taskType, inputStep=recoTask.getStep("cmsRun1"), inputModule=recoOutLabel, scenarioName=self.procScenario, scenarioFunc="alcaSkim", scenarioArgs={ 'globalTag': self.globalTag, 'skims': self.alcaSkims, 'primaryDataset': self.inputPrimaryDataset }, splitAlgo="WMBSMergeBySize", splitArgs={ "max_merge_size": self.maxMergeSize, "min_merge_size": self.minMergeSize, "max_merge_events": self.maxMergeEvents }, stepType=cmsswStepType) if self.doLogCollect: self.addLogCollectTask(alcaTask, taskName="AlcaSkimLogCollect") self.addCleanupTask(recoTask, recoOutLabel) for alcaOutLabel, alcaOutInfo in alcaOutMods.items(): self.addMergeTask(alcaTask, self.procJobSplitAlgo, alcaOutLabel, doLogCollect=self.doLogCollect) for promptSkim in self.promptSkims: if not promptSkim.DataTier in recoMergeTasks: error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys( ) error += 'That should be in the relevant skimConfig in T0AST' logging.error(error) raise Exception mergeTask = recoMergeTasks[promptSkim.DataTier] skimTask = mergeTask.addTask(promptSkim.SkimName) parentCmsswStep = mergeTask.getStep('cmsRun1') parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion, 'PromptSkim') self.processingString = parsedProcVer["ProcString"] self.processingVersion = parsedProcVer["ProcVer"] if promptSkim.TwoFileRead: self.skimJobSplitArgs['include_parents'] = True else: self.skimJobSplitArgs['include_parents'] = False configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName) configCacheUrl = self.configCacheUrl or self.couchURL injectIntoConfigCache(self.frameworkVersion, self.scramArch, self.initCommand, promptSkim.ConfigURL, configLabel, configCacheUrl, self.couchDBName, self.envPath, self.binPath) try: configCache = ConfigCache(configCacheUrl, self.couchDBName) configCacheID = configCache.getIDFromLabel(configLabel) if configCacheID: logging.error( "The configuration was not uploaded to couch") raise Exception except Exception: logging.error( "There was an exception loading the config out of the") logging.error( "ConfigCache. Check the scramOutput.log file in the") logging.error( "PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise outputMods = self.setupProcessingTask( skimTask, "Skim", inputStep=parentCmsswStep, inputModule="Merged", couchURL=self.couchURL, couchDBName=self.couchDBName, configCacheUrl=self.configCacheUrl, configDoc=configCacheID, splitAlgo=self.skimJobSplitAlgo, splitArgs=self.skimJobSplitArgs) if self.doLogCollect: self.addLogCollectTask(skimTask, taskName="%sLogCollect" % promptSkim.SkimName) for outputModuleName in outputMods.keys(): self.addMergeTask(skimTask, self.skimJobSplitAlgo, outputModuleName, doLogCollect=self.doLogCollect) return workload
def buildWorkload(self): """ _buildWorkload_ Build the workload given all of the input parameters. Not that there will be LogCollect tasks created for each processing task and Cleanup tasks created for each merge task. """ (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() workload.setDashboardActivity("tier0") self.reportWorkflowToDashboard(workload.getDashboardActivity()) workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo, self.procJobSplitArgs) cmsswStepType = "CMSSW" taskType = "Processing" if self.multicore: taskType = "MultiProcessing" recoOutputs = [] for dataTier in self.writeTiers: recoOutputs.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'moduleLabel' : "write_%s" % dataTier } ) recoTask = workload.newTask("Reco") recoOutMods = self.setupProcessingTask(recoTask, taskType, self.inputDataset, scenarioName = self.procScenario, scenarioFunc = "promptReco", scenarioArgs = { 'globalTag' : self.globalTag, 'skims' : self.alcaSkims, 'dqmSeq' : self.dqmSequences, 'outputs' : recoOutputs }, splitAlgo = self.procJobSplitAlgo, splitArgs = self.procJobSplitArgs, stepType = cmsswStepType, forceUnmerged = True) if self.doLogCollect: self.addLogCollectTask(recoTask) recoMergeTasks = {} for recoOutLabel, recoOutInfo in recoOutMods.items(): if recoOutInfo['dataTier'] != "ALCARECO": mergeTask = self.addMergeTask(recoTask, self.procJobSplitAlgo, recoOutLabel, doLogCollect = self.doLogCollect) recoMergeTasks[recoOutInfo['dataTier']] = mergeTask else: alcaTask = recoTask.addTask("AlcaSkim") alcaOutMods = self.setupProcessingTask(alcaTask, taskType, inputStep = recoTask.getStep("cmsRun1"), inputModule = recoOutLabel, scenarioName = self.procScenario, scenarioFunc = "alcaSkim", scenarioArgs = { 'globalTag' : self.globalTag, 'skims' : self.alcaSkims, 'primaryDataset' : self.inputPrimaryDataset }, splitAlgo = "WMBSMergeBySize", splitArgs = {"max_merge_size": self.maxMergeSize, "min_merge_size": self.minMergeSize, "max_merge_events": self.maxMergeEvents}, stepType = cmsswStepType) if self.doLogCollect: self.addLogCollectTask(alcaTask, taskName = "AlcaSkimLogCollect") self.addCleanupTask(recoTask, recoOutLabel) for alcaOutLabel, alcaOutInfo in alcaOutMods.items(): self.addMergeTask(alcaTask, self.procJobSplitAlgo, alcaOutLabel, doLogCollect = self.doLogCollect) for promptSkim in self.promptSkims: if not promptSkim.DataTier in recoMergeTasks: error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys() error += 'That should be in the relevant skimConfig in T0AST' logging.error(error) raise Exception mergeTask = recoMergeTasks[promptSkim.DataTier] skimTask = mergeTask.addTask(promptSkim.SkimName) parentCmsswStep = mergeTask.getStep('cmsRun1') parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion, 'PromptSkim') self.processingString = parsedProcVer["ProcString"] self.processingVersion = parsedProcVer["ProcVer"] if promptSkim.TwoFileRead: self.skimJobSplitArgs['include_parents'] = True else: self.skimJobSplitArgs['include_parents'] = False configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName) configCacheUrl = self.configCacheUrl or self.couchURL injectIntoConfigCache(self.frameworkVersion, self.scramArch, self.initCommand, promptSkim.ConfigURL, configLabel, configCacheUrl, self.couchDBName, self.envPath, self.binPath) try: configCache = ConfigCache(configCacheUrl, self.couchDBName) configCacheID = configCache.getIDFromLabel(configLabel) if configCacheID: logging.error("The configuration was not uploaded to couch") raise Exception except Exception: logging.error("There was an exception loading the config out of the") logging.error("ConfigCache. Check the scramOutput.log file in the") logging.error("PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise outputMods = self.setupProcessingTask(skimTask, "Skim", inputStep = parentCmsswStep, inputModule = "Merged", couchURL = self.couchURL, couchDBName = self.couchDBName, configCacheUrl = self.configCacheUrl, configDoc = configCacheID, splitAlgo = self.skimJobSplitAlgo, splitArgs = self.skimJobSplitArgs) if self.doLogCollect: self.addLogCollectTask(skimTask, taskName = "%sLogCollect" % promptSkim.SkimName) for outputModuleName in outputMods.keys(): self.addMergeTask(skimTask, self.skimJobSplitAlgo, outputModuleName, doLogCollect = self.doLogCollect) return workload