def _configCacheId(self, label): """Return config cache id for given config label""" key, cert = self.__class__.reqmgr['requests'].getKeyCert() configCache = ConfigCache(self.__class__.endpoint + '/couchdb', 'reqmgr_config_cache', ckey=key, cert=cert) try: configCacheId = configCache.getIDFromLabel(label) except: configCacheId = None if configCacheId: return configCacheId # The following will fail if FWCore.ParameterSet not in PYTHONPATH from PSetTweaks.WMTweak import makeTweak configCache.createUserGroup('test', 'test') configDir = os.path.join(getTestBase(), '..', '..', 'test', 'data', 'configs') configCache.addConfig(os.path.join(configDir, label + '.py')) configCache.setLabel(label) configCache.setDescription(label) modPath = imp.find_module(label, [configDir]) loadedConfig = imp.load_module(label, modPath[0], modPath[1], modPath[2]) configCache.setPSetTweaks( makeTweak(loadedConfig.process).jsondictionary()) configCache.save() return configCache.getIDFromLabel(label)
def _configCacheId(self, label): """Return config cache id for given config label""" key, cert = self.__class__.reqmgr['requests'].getKeyCert() configCache = ConfigCache(self.__class__.endpoint + '/couchdb', 'reqmgr_config_cache', ckey = key, cert = cert) try: configCacheId = configCache.getIDFromLabel(label) except: configCacheId = None if configCacheId: return configCacheId # The following will fail if FWCore.ParameterSet not in PYTHONPATH from PSetTweaks.WMTweak import makeTweak configCache.createUserGroup('test', 'test') configDir = os.path.join(getTestBase(), '..', '..', 'test', 'data', 'configs') configCache.addConfig(os.path.join(configDir, label + '.py')) configCache.setLabel(label) configCache.setDescription(label) modPath = imp.find_module(label, [configDir]) loadedConfig = imp.load_module(label, modPath[0], modPath[1], modPath[2]) configCache.setPSetTweaks(makeTweak(loadedConfig.process).jsondictionary()) configCache.save() return configCache.getIDFromLabel(label)
def __call__(self, workloadName, arguments): """ _call_ Create a PromptSkimming workload with the given parameters. """ configCouchUrl = arguments.get("ConfigCacheUrl", None) or arguments["CouchURL"] injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"], arguments["InitCommand"], arguments["SkimConfig"], workloadName, configCouchUrl, arguments["CouchDBName"], arguments.get("EnvPath", None), arguments.get("BinPath", None)) try: configCache = ConfigCache(configCouchUrl, arguments["CouchDBName"]) arguments["ConfigCacheID"] = configCache.getIDFromLabel( workloadName) if not arguments["ConfigCacheID"]: logging.error("The configuration was not uploaded to couch") raise Exception except Exception: logging.error( "There was an exception loading the config out of the") logging.error( "ConfigCache. Check the scramOutput.log file in the") logging.error( "PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise parsedProcVer = parseT0ProcVer(arguments["ProcessingVersion"], 'PromptSkim') arguments["ProcessingString"] = parsedProcVer["ProcString"] arguments["ProcessingVersion"] = parsedProcVer["ProcVer"] workload = DataProcessingWorkloadFactory.__call__( self, workloadName, arguments) # We need to strip off "MSS" as that causes all sorts of problems. if arguments["CustodialSite"].find("MSS") != -1: site = arguments["CustodialSite"][:-4] else: site = arguments["CustodialSite"] workload.setSiteWhitelist(site) workload.setBlockWhitelist(arguments["BlockName"]) return workload
def __call__(self, workloadName, arguments): """ _call_ Create a PromptSkimming workload with the given parameters. """ self.injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"], arguments["InitCommand"], arguments["SkimConfig"], workloadName, arguments["CouchURL"], arguments["CouchDBName"]) configCache = ConfigCache(arguments["CouchURL"], arguments["CouchDBName"]) arguments["ProcConfigCacheID"] = configCache.getIDFromLabel(workloadName) workload = DataProcessingWorkloadFactory.__call__(self, workloadName, arguments) workload.setSiteWhitelist(arguments["CustodialSite"]) workload.setBlockWhitelist(arguments["BlockName"]) return workload
def __call__(self, workloadName, arguments): """ _call_ Create a PromptSkimming workload with the given parameters. """ self.injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"], arguments["InitCommand"], arguments["SkimConfig"], workloadName, arguments["CouchURL"], arguments["CouchDBName"]) try: configCache = ConfigCache(arguments["CouchURL"], arguments["CouchDBName"]) arguments["ProcConfigCacheID"] = configCache.getIDFromLabel(workloadName) except Exception, ex: logging.error("There was an exception loading the config out of the") logging.error("ConfigCache. Check the scramOutput.log file in the") logging.error("PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise
def __call__(self, workloadName, arguments): """ _call_ Create a PromptSkimming workload with the given parameters. """ configCouchUrl = arguments.get("ConfigCacheUrl", None) or arguments["CouchURL"] injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"], arguments["InitCommand"], arguments["SkimConfig"], workloadName, configCouchUrl, arguments["CouchDBName"], arguments.get("EnvPath", None), arguments.get("BinPath", None)) try: configCache = ConfigCache(configCouchUrl, arguments["CouchDBName"]) arguments["ConfigCacheID"] = configCache.getIDFromLabel(workloadName) if not arguments["ConfigCacheID"]: logging.error("The configuration was not uploaded to couch") raise Exception except Exception: logging.error("There was an exception loading the config out of the") logging.error("ConfigCache. Check the scramOutput.log file in the") logging.error("PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise parsedProcVer = parseT0ProcVer(arguments["ProcessingVersion"], 'PromptSkim') arguments["ProcessingString"] = parsedProcVer["ProcString"] arguments["ProcessingVersion"] = parsedProcVer["ProcVer"] workload = DataProcessingWorkloadFactory.__call__(self, workloadName, arguments) # We need to strip off "MSS" as that causes all sorts of problems. if arguments["CustodialSite"].find("MSS") != -1: site = arguments["CustodialSite"][:-4] else: site = arguments["CustodialSite"] workload.setSiteWhitelist(site) workload.setBlockWhitelist(arguments["BlockName"]) return workload
def buildWorkload(self): """ _buildWorkload_ Build the workload given all of the input parameters. Not that there will be LogCollect tasks created for each processing task and Cleanup tasks created for each merge task. """ (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() workload.setDashboardActivity("tier0") self.reportWorkflowToDashboard(workload.getDashboardActivity()) workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo, self.procJobSplitArgs) cmsswStepType = "CMSSW" taskType = "Processing" if self.multicore: taskType = "MultiProcessing" recoOutputs = [] for dataTier in self.writeTiers: recoOutputs.append({ 'dataTier': dataTier, 'eventContent': dataTier, 'moduleLabel': "write_%s" % dataTier }) recoTask = workload.newTask("Reco") recoOutMods = self.setupProcessingTask(recoTask, taskType, self.inputDataset, scenarioName=self.procScenario, scenarioFunc="promptReco", scenarioArgs={ 'globalTag': self.globalTag, 'skims': self.alcaSkims, 'dqmSeq': self.dqmSequences, 'outputs': recoOutputs }, splitAlgo=self.procJobSplitAlgo, splitArgs=self.procJobSplitArgs, stepType=cmsswStepType, forceUnmerged=True) if self.doLogCollect: self.addLogCollectTask(recoTask) recoMergeTasks = {} for recoOutLabel, recoOutInfo in recoOutMods.items(): if recoOutInfo['dataTier'] != "ALCARECO": mergeTask = self.addMergeTask(recoTask, self.procJobSplitAlgo, recoOutLabel, doLogCollect=self.doLogCollect) recoMergeTasks[recoOutInfo['dataTier']] = mergeTask else: alcaTask = recoTask.addTask("AlcaSkim") alcaOutMods = self.setupProcessingTask( alcaTask, taskType, inputStep=recoTask.getStep("cmsRun1"), inputModule=recoOutLabel, scenarioName=self.procScenario, scenarioFunc="alcaSkim", scenarioArgs={ 'globalTag': self.globalTag, 'skims': self.alcaSkims, 'primaryDataset': self.inputPrimaryDataset }, splitAlgo="WMBSMergeBySize", splitArgs={ "max_merge_size": self.maxMergeSize, "min_merge_size": self.minMergeSize, "max_merge_events": self.maxMergeEvents }, stepType=cmsswStepType) if self.doLogCollect: self.addLogCollectTask(alcaTask, taskName="AlcaSkimLogCollect") self.addCleanupTask(recoTask, recoOutLabel) for alcaOutLabel, alcaOutInfo in alcaOutMods.items(): self.addMergeTask(alcaTask, self.procJobSplitAlgo, alcaOutLabel, doLogCollect=self.doLogCollect) for promptSkim in self.promptSkims: if not promptSkim.DataTier in recoMergeTasks: error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys( ) error += 'That should be in the relevant skimConfig in T0AST' logging.error(error) raise Exception mergeTask = recoMergeTasks[promptSkim.DataTier] skimTask = mergeTask.addTask(promptSkim.SkimName) parentCmsswStep = mergeTask.getStep('cmsRun1') parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion, 'PromptSkim') self.processingString = parsedProcVer["ProcString"] self.processingVersion = parsedProcVer["ProcVer"] if promptSkim.TwoFileRead: self.skimJobSplitArgs['include_parents'] = True else: self.skimJobSplitArgs['include_parents'] = False configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName) configCacheUrl = self.configCacheUrl or self.couchURL injectIntoConfigCache(self.frameworkVersion, self.scramArch, self.initCommand, promptSkim.ConfigURL, configLabel, configCacheUrl, self.couchDBName, self.envPath, self.binPath) try: configCache = ConfigCache(configCacheUrl, self.couchDBName) configCacheID = configCache.getIDFromLabel(configLabel) if configCacheID: logging.error( "The configuration was not uploaded to couch") raise Exception except Exception: logging.error( "There was an exception loading the config out of the") logging.error( "ConfigCache. Check the scramOutput.log file in the") logging.error( "PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise outputMods = self.setupProcessingTask( skimTask, "Skim", inputStep=parentCmsswStep, inputModule="Merged", couchURL=self.couchURL, couchDBName=self.couchDBName, configCacheUrl=self.configCacheUrl, configDoc=configCacheID, splitAlgo=self.skimJobSplitAlgo, splitArgs=self.skimJobSplitArgs) if self.doLogCollect: self.addLogCollectTask(skimTask, taskName="%sLogCollect" % promptSkim.SkimName) for outputModuleName in outputMods.keys(): self.addMergeTask(skimTask, self.skimJobSplitAlgo, outputModuleName, doLogCollect=self.doLogCollect) return workload
def buildWorkload(self): """ _buildWorkload_ Build the workload given all of the input parameters. Not that there will be LogCollect tasks created for each processing task and Cleanup tasks created for each merge task. """ (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() workload.setDashboardActivity("tier0") self.reportWorkflowToDashboard(workload.getDashboardActivity()) workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo, self.procJobSplitArgs) cmsswStepType = "CMSSW" taskType = "Processing" recoOutputs = [] for dataTier in self.writeTiers: recoOutputs.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'moduleLabel' : "write_%s" % dataTier } ) recoTask = workload.newTask("Reco") scenarioArgs = { 'globalTag' : self.globalTag, 'skims' : self.alcaSkims, 'dqmSeq' : self.dqmSequences, 'outputs' : recoOutputs } if self.globalTagConnect: scenarioArgs['globalTagConnect'] = self.globalTagConnect recoOutMods = self.setupProcessingTask(recoTask, taskType, self.inputDataset, scenarioName = self.procScenario, scenarioFunc = "promptReco", scenarioArgs = scenarioArgs, splitAlgo = self.procJobSplitAlgo, splitArgs = self.procJobSplitArgs, stepType = cmsswStepType, forceUnmerged = True) if self.doLogCollect: self.addLogCollectTask(recoTask) recoMergeTasks = {} for recoOutLabel, recoOutInfo in recoOutMods.items(): if recoOutInfo['dataTier'] != "ALCARECO": mergeTask = self.addMergeTask(recoTask, self.procJobSplitAlgo, recoOutLabel, doLogCollect = self.doLogCollect) recoMergeTasks[recoOutInfo['dataTier']] = mergeTask else: alcaTask = recoTask.addTask("AlcaSkim") scenarioArgs = { 'globalTag' : self.globalTag, 'skims' : self.alcaSkims, 'primaryDataset' : self.inputPrimaryDataset } if self.globalTagConnect: scenarioArgs['globalTagConnect'] = self.globalTagConnect alcaOutMods = self.setupProcessingTask(alcaTask, taskType, inputStep = recoTask.getStep("cmsRun1"), inputModule = recoOutLabel, scenarioName = self.procScenario, scenarioFunc = "alcaSkim", scenarioArgs = scenarioArgs, splitAlgo = "WMBSMergeBySize", splitArgs = {"max_merge_size": self.maxMergeSize, "min_merge_size": self.minMergeSize, "max_merge_events": self.maxMergeEvents}, stepType = cmsswStepType, useMulticore = False) if self.doLogCollect: self.addLogCollectTask(alcaTask, taskName = "AlcaSkimLogCollect") self.addCleanupTask(recoTask, recoOutLabel) for alcaOutLabel, alcaOutInfo in alcaOutMods.items(): self.addMergeTask(alcaTask, self.procJobSplitAlgo, alcaOutLabel, doLogCollect = self.doLogCollect) for promptSkim in self.promptSkims: if not promptSkim.DataTier in recoMergeTasks: error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys() error += 'That should be in the relevant skimConfig in T0AST' logging.error(error) raise Exception mergeTask = recoMergeTasks[promptSkim.DataTier] skimTask = mergeTask.addTask(promptSkim.SkimName) parentCmsswStep = mergeTask.getStep('cmsRun1') parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion, 'PromptSkim') self.processingString = parsedProcVer["ProcString"] self.processingVersion = parsedProcVer["ProcVer"] if promptSkim.TwoFileRead: self.skimJobSplitArgs['include_parents'] = True else: self.skimJobSplitArgs['include_parents'] = False configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName) configCacheUrl = self.configCacheUrl or self.couchURL injectIntoConfigCache(self.frameworkVersion, self.scramArch, self.initCommand, promptSkim.ConfigURL, configLabel, configCacheUrl, self.couchDBName, self.envPath, self.binPath) try: configCache = ConfigCache(configCacheUrl, self.couchDBName) configCacheID = configCache.getIDFromLabel(configLabel) if configCacheID: logging.error("The configuration was not uploaded to couch") raise Exception except Exception: logging.error("There was an exception loading the config out of the") logging.error("ConfigCache. Check the scramOutput.log file in the") logging.error("PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise outputMods = self.setupProcessingTask(skimTask, "Skim", inputStep = parentCmsswStep, inputModule = "Merged", couchURL = self.couchURL, couchDBName = self.couchDBName, configCacheUrl = self.configCacheUrl, configDoc = configCacheID, splitAlgo = self.skimJobSplitAlgo, splitArgs = self.skimJobSplitArgs, useMulticore = False) if self.doLogCollect: self.addLogCollectTask(skimTask, taskName = "%sLogCollect" % promptSkim.SkimName) for outputModuleName in outputMods.keys(): self.addMergeTask(skimTask, self.skimJobSplitAlgo, outputModuleName, doLogCollect = self.doLogCollect) workload.setBlockCloseSettings(self.blockCloseDelay, workload.getBlockCloseMaxFiles(), workload.getBlockCloseMaxEvents(), workload.getBlockCloseMaxSize()) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString workload.setTaskPropertiesFromWorkload() # set the LFN bases (normally done by request manager) # also pass runNumber (workload evaluates it) workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase, runNumber = self.runNumber) return workload
def buildWorkload(self): """ _buildWorkload_ Build the workload given all of the input parameters. Not that there will be LogCollect tasks created for each processing task and Cleanup tasks created for each merge task. """ (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() workload.setDashboardActivity("tier0") self.reportWorkflowToDashboard(workload.getDashboardActivity()) workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo, self.procJobSplitArgs) cmsswStepType = "CMSSW" taskType = "Processing" if self.multicore: taskType = "MultiProcessing" recoOutputs = [] for dataTier in self.writeTiers: recoOutputs.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'filterName' : "Tier1PromptReco", 'moduleLabel' : "write_%s" % dataTier } ) recoTask = workload.newTask("Reco") recoOutMods = self.setupProcessingTask(recoTask, taskType, self.inputDataset, scenarioName = self.procScenario, scenarioFunc = "promptReco", scenarioArgs = { 'globalTag' : self.globalTag, 'skims' : self.alcaSkims, 'outputs' : recoOutputs }, splitAlgo = self.procJobSplitAlgo, splitArgs = self.procJobSplitArgs, stepType = cmsswStepType, forceUnmerged = True) self.addLogCollectTask(recoTask) recoMergeTasks = {} for recoOutLabel, recoOutInfo in recoOutMods.items(): if recoOutInfo['dataTier'] != "ALCARECO": mergeTask = self.addMergeTask(recoTask, self.procJobSplitAlgo, recoOutLabel) recoMergeTasks[recoOutInfo['dataTier']] = mergeTask else: alcaTask = recoTask.addTask("AlcaSkim") alcaOutMods = self.setupProcessingTask(alcaTask, taskType, inputStep = recoTask.getStep("cmsRun1"), inputModule = recoOutLabel, scenarioName = self.procScenario, scenarioFunc = "alcaSkim", scenarioArgs = { 'globalTag' : self.globalTag, 'skims' : self.alcaSkims, 'primaryDataset' : self.inputPrimaryDataset }, splitAlgo = "WMBSMergeBySize", splitArgs = {"max_merge_size": self.maxMergeSize, "min_merge_size": self.minMergeSize, "max_merge_events": self.maxMergeEvents}, stepType = cmsswStepType) self.addLogCollectTask(alcaTask, taskName = "AlcaSkimLogCollect") self.addCleanupTask(recoTask, recoOutLabel) for alcaOutLabel, alcaOutInfo in alcaOutMods.items(): self.addMergeTask(alcaTask, self.procJobSplitAlgo, alcaOutLabel) for promptSkim in self.promptSkims: if not promptSkim.DataTier in recoMergeTasks: error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys() error += 'That should be in the relevant skimConfig in T0AST' logging.error(error) raise Exception mergeTask = recoMergeTasks[promptSkim.DataTier] skimTask = mergeTask.addTask(promptSkim.SkimName) parentCmsswStep = mergeTask.getStep('cmsRun1') #Does this work? self.processingVersion = promptSkim.ProcessingVersion if promptSkim.TwoFileRead: self.skimJobSplitArgs['include_parents'] = True else: self.skimJobSplitArgs['include_parents'] = False injectIntoConfigCache(self.frameworkVersion, self.scramArch, self.initCommand, promptSkim.ConfigURL, self.workloadName, self.couchURL, self.couchDBName) try: configCache = ConfigCache(self.couchURL, self.couchDBName) procConfigCacheID = configCache.getIDFromLabel(self.workloadName) except Exception: logging.error("There was an exception loading the config out of the") logging.error("ConfigCache. Check the scramOutput.log file in the") logging.error("PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise outputMods = self.setupProcessingTask(skimTask, "Skim", inputStep = parentCmsswStep, inputModule = "Merged", couchURL = self.couchURL, couchDBName = self.couchDBName, configDoc = procConfigCacheID, splitAlgo = self.skimJobSplitAlgo, splitArgs = self.skimJobSplitArgs) self.addLogCollectTask(skimTask, taskName = "%sLogCollect" % promptSkim.SkimName) for outputModuleName in outputMods.keys(): self.addMergeTask(skimTask, self.skimJobSplitAlgo, outputModuleName) return workload