def testFirstEvent(self): """ _testFirstEvent_ Verify that we set the firstEvent parameter whenever the FirstEvent field in the job mask is a positive integer. And the job is not production. """ job = Job() job["input_files"] = [{"lfn": "bogusFile", "parents": []}] job["mask"] = Mask() tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertFalse(hasattr(tweak.process.source, "skipEvents"), "Error: There should be no skipEvents tweak.") self.assertFalse(hasattr(tweak.process.source, "firstEvent"), "Error: There should be no firstEvent tweak.") job["mask"]["FirstEvent"] = 0 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "skipEvents"), "Error: There should be a skipEvents tweak.") self.assertEqual(tweak.process.source.skipEvents, 'customTypeCms.untracked.uint32(0)', "Error: The skipEvents tweak should be 0.") return
def testFirstRunMC(self): """ _testFirstRunMC_ Verify that we set the lumi in a MC job and it gets into process.source.firstRun parameter. """ job = Job() job["input_files"] = [{"lfn": "MCFakeFile", "parents": []}] job["mask"] = Mask() job["mask"]["FirstLumi"] = 200 job["mask"]["FirstEvent"] = 100 job["counter"] = 5 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "firstRun"), "Error: There should be a first run tweak") self.assertEqual(tweak.process.source.firstRun, 'customTypeCms.untracked.uint32(1)', "Error: The first run should be 1") job["mask"]["FirstRun"] = 5 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "firstRun"), "Error: There should be a first run tweak") self.assertEqual(tweak.process.source.firstRun, 'customTypeCms.untracked.uint32(5)', "Error: The first run should be 5")
def testFirstEventMC(self): """ _testFirstEventMC_ Verify that we set the firstEvent parameter whenever the FirstEvent field in the job mask is a positive integer and the job is a production one. Otherwise we get a number based on the counter (modulo 2^32 - 1) """ job = Job() job["input_files"] = [{"lfn": "MCFakeFile", "parents": []}] job["mask"] = Mask() job["counter"] = 5 job["mask"]["FirstLumi"] = 200 try: tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertRaises(WMTweakMaskError, WMTweaks.makeJobTweak, job) except WMTweakMaskError: pass job["mask"]["FirstEvent"] = 100 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertFalse( hasattr(tweak.process.source, "skipEvents"), "Error: There should be no skipEvents tweak, it's MC.") self.assertTrue(hasattr(tweak.process.source, "firstEvent"), "Error: There should be a first event tweak") self.assertEqual(tweak.process.source.firstEvent, 'customTypeCms.untracked.uint32(100)', "Error: The firstEvent tweak should be 100.") return
def makeThreadsStreamsTweak(self): """ _makeThreadsStreamsTweak_ Tweak threads and streams paraameters """ origCores = int( getattr(self.step.data.application.multicore, 'numberOfCores', 1)) eventStreams = int( getattr(self.step.data.application.multicore, 'eventStreams', 0)) resources = {'cores': origCores} resizeResources(resources) numCores = resources['cores'] if numCores != origCores: self.logger.info( "Resizing a job with nStreams != nCores. Setting nStreams = nCores. This may end badly." ) eventStreams = 0 tweak = PSetTweak() tweak.addParameter("process.options", "customTypeCms.untracked.PSet()") self.applyPsetTweak(tweak, skipIfSet=True) self.tweak.addParameter( "process.options.numberOfThreads", "customTypeCms.untracked.uint32(%s)" % numCores) self.tweak.addParameter( "process.options.numberOfStreams", "customTypeCms.untracked.uint32(%s)" % eventStreams) return
def handlePerformanceSettings(self): """ _handlePerformanceSettings_ Install the standard performance report services """ tweak = PSetTweak() # include the default performance report services if getattr(self.step.data.application.command, 'silentMemoryCheck', False): tweak.addParameter( "process.SimpleMemoryCheck", "customTypeCms.Service('SimpleMemoryCheck', jobReportOutputOnly=cms.untracked.bool(True))" ) else: tweak.addParameter("process.SimpleMemoryCheck", "customTypeCms.Service('SimpleMemoryCheck')") tweak.addParameter("process.CPU", "customTypeCms.Service('CPU')") tweak.addParameter("process.Timing", "customTypeCms.Service('Timing')") self.applyPsetTweak(tweak) self.tweak.addParameter("process.Timing.summaryOnly", "customTypeCms.untracked(cms.bool(True))") return
def __call__(self, process): tweak = PSetTweak() # handle process parameters processParams = [] [ processParams.extend(expandParameter(process, param).keys()) for param in self.processLevel ] [ tweak.addParameter(param, getParameter(process, param)) for param in processParams if hasParameter(process, param) ] # output modules tweak.addParameter('process.outputModules_', []) for outMod in process.outputModules_(): tweak.getParameter('process.outputModules_').append(outMod) outModRef = getattr(process, outMod) for param in self.outModLevel: fullParam = "process.%s.%s" % (outMod, param) if hasParameter(outModRef, param, True): tweak.addParameter(fullParam, getParameter(outModRef, param, True)) return tweak
def applyPsetTweak(self, psetTweak, skipIfSet=False, allowFailedTweaks=False, name='', cleanupTweak=False): procScript = "edm_pset_tweak.py" psetTweakJson = os.path.join(self.stepSpace.location, "PSetTweak%s.json" % name) psetTweak.persist(psetTweakJson, formatting='simplejson') cmd = "%s --input_pkl %s --output_pkl %s --json %s" % ( procScript, os.path.join(self.stepSpace.location, self.configPickle), os.path.join(self.stepSpace.location, self.configPickle), psetTweakJson) if skipIfSet: cmd += " --skip_if_set" if allowFailedTweaks: cmd += " --allow_failed_tweaks" self.scramRun(cmd) if cleanupTweak is True: psetTweak = PSetTweak() return
def testFirstRun(self): """ _testFirstRun_ Verify that when we set the FirstRun in the mask, it is set in the process but when it is not, then no firstRun appears in the process. This for jobs with real input files. """ job = Job() job["input_files"] = [{"lfn": "bogusFile", "parents": []}] job["mask"] = Mask() tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertFalse(hasattr(tweak.process.source, "firstRun"), "Error: There should be no firstRun tweak.") job["mask"]["FirstRun"] = 93 tweak = WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "firstRun"), "Error: There should be a firstRun tweak.") self.assertEqual(tweak.process.source.firstRun, 'customTypeCms.untracked.uint32(93)', "Error: The firstRun tweak should be 93.") return
def __init__(self, crabPSet=False): ScriptInterface.__init__(self) self.crabPSet = crabPSet self.process = None self.jobBag = None self.logger = logging.getLogger() self.tweak = PSetTweak() self.scram = None self.configPickle = "Pset.pkl" self.psetFile = None
def applyTweak(self, psetTweak): """ _applyTweak_ Apply a tweak to the process. """ tweak = PSetTweak() tweak.unpersist(psetTweak) applyTweak(self.process, tweak, self.fixupDict) return
def testFirstLumiMC(self): """ _testFirstLumiMC_ Verify that we set the lumi in a MC job and it gets into process.source.firstRun parameter, and if we don't at least we get the counter there. """ job = Job() job["input_files"] = [{"lfn": "MCFakeFile", "parents": []}] job["mask"] = Mask() job["counter"] = 5 job["mask"]["FirstEvent"] = 100 try: tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertRaises(WMTweakMaskError, WMTweaks.makeJobTweak, job) except WMTweakMaskError: pass job["mask"]["FirstLumi"] = 200 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "firstLuminosityBlock"), "Error: There should be a first lumi tweak") self.assertEqual(tweak.process.source.firstLuminosityBlock, 'customTypeCms.untracked.uint32(200)', "Error: The first luminosity block should be 200") job["mask"]["FirstLumi"] = 10 tweak = PSetTweak() WMTweaks.makeJobTweak(job, tweak) self.assertTrue(hasattr(tweak.process.source, "firstLuminosityBlock"), "Error: There should be a first lumi tweak") self.assertEqual(tweak.process.source.firstLuminosityBlock, 'customTypeCms.untracked.uint32(10)', "Error: The first luminosity block should be 10")
def testC(self): """test building a tweak from the seeds""" job = Job("TestJob") seeder = AutomaticSeeding() job.addBaggageParameter("process.RandomNumberGeneratorService.seed1.initialSeed", 123445) job.addBaggageParameter("process.RandomNumberGeneratorService.seed2.initialSeed", 123445) job.addBaggageParameter("process.RandomNumberGeneratorService.seed3.initialSeed", 7464738) job.addBaggageParameter("process.RandomNumberGeneratorService.seed44.initialSeed", 98273762) seeder(job) tweak = PSetTweak() for x in job.baggage.process.RandomNumberGeneratorService: parameter = "process.RandomNumberGeneratorService.%s.initialSeed" % x._internal_name tweak.addParameter(parameter, x.initialSeed) print(tweak)
def makeTaskTweak(stepSection): """ _makeTaskTweak_ Create a tweak for options in the task that apply to all jobs. """ result = PSetTweak() # GlobalTag if hasattr(stepSection, "application"): if hasattr(stepSection.application, "configuration"): if hasattr(stepSection.application.configuration, "pickledarguments"): args = pickle.loads(stepSection.application.configuration.pickledarguments) if 'globalTag' in args: result.addParameter("process.GlobalTag.globaltag", args['globalTag']) if 'globalTagTransaction' in args: result.addParameter("process.GlobalTag.DBParameters.transactionId", args['globalTagTransaction']) return result
def fixupProcess(self): """ _fixupProcess_ Look over the process object and make sure that all of the attributes that we expect to exist actually exist. """ # Make sure that for each output module the following parameters exist # in the PSet returned from the framework: # fileName # logicalFileName # dataset.dataTier # dataset.filterName if hasattr(self.process, "outputModules"): outputModuleNames = list(self.process.outputModules) elif hasattr(self.process, "outputModules_"): outputModuleNames = self.process.outputModules_() elif hasattr(self.process, "_Process__outputmodules"): outputModuleNames = list(self.process._Process__outputmodules) else: msg = "Error loading output modules from process" raise AttributeError(msg) for outMod in outputModuleNames: tweak = PSetTweak() self.logger.info("DEBUG output module = %s", outMod) tweak.addParameter("process.options", "customTypeCms.untracked.PSet()") tweak.addParameter( "process.%s.dataset" % outMod, "customTypeCms.untracked.PSet(dataTier=cms.untracked.string(''), filterName=cms.untracked.string(''))" ) self.applyPsetTweak(tweak, skipIfSet=True, cleanupTweak=True) #tweak.addParameter("process.%s.dataset.dataTier" % outMod, "customTypeCms.untracked.string('')") #tweak.addParameter("process.%s.dataset.filterName" % outMod, "customTypeCms.untracked.string('')") tweak.addParameter("process.%s.fileName" % outMod, "customTypeCms.untracked.string('')") tweak.addParameter("process.%s.logicalFileName" % outMod, "customTypeCms.untracked.string('')") self.applyPsetTweak(tweak, skipIfSet=True) return
def handleSeeding(self): """ _handleSeeding_ Handle Random Seed settings for the job """ baggage = self.job.getBaggage() seeding = getattr(baggage, "seeding", None) if seeding == "ReproducibleSeeding": randService = self.process.RandomNumberGeneratorService tweak = PSetTweak() for x in randService: parameter = "process.RandomNumberGeneratorService.%s.initialSeed" % x._internal_name tweak.addParameter(parameter, x.initialSeed) applyTweak(self.process, tweak, self.fixupDict) else: if hasattr(self.process, "RandomNumberGeneratorService"): from IOMC.RandomEngine.RandomServiceHelper import RandomNumberServiceHelper helper = RandomNumberServiceHelper(self.process.RandomNumberGeneratorService) helper.populate() return
def makeOutputTweak(outMod, job): """ _makeOutputTweak_ Make a PSetTweak for the output module and job instance provided """ result = PSetTweak() # output filenames modName = str(getattr(outMod, "_internal_name")) fileName = "%s.root" % modName result.addParameter("process.%s.fileName" % modName, fileName) lfnBase = str(getattr(outMod, "lfnBase", None)) if lfnBase != None: lfn = "%s/%s/%s.root" % (lfnBase, lfnGroup(job), modName) result.addParameter("process.%s.logicalFileName" % modName, lfn) #TODO: Nice standard way to meddle with the other parameters in the # output module based on the settings in the section return result
def createScriptLines(opts, pklIn): """ prepares a bash script fragment which tweaks the PSet params according to opts returns a string containing the script lines separated by '\n' """ runAndLumis = {} if opts.runAndLumis: runAndLumis = readFileFromTarball(opts.runAndLumis, 'run_and_lumis.tar.gz') inputFiles = {} if opts.inputFile: inputFiles = readFileFromTarball(opts.inputFile, 'input_files.tar.gz') # build a tweak object with the needed changes to be applied to PSet tweak = PSetTweak() # add tweaks # inputFile will always be present # inputFile can have three formats depending on wether secondary input files are used: # 1. a single LFN as a string : "/store/.....root" # 2. a list of LFNs : ["/store/.....root", "/store/....root", ...] # 3. a list of dictionaries (one per file) with keys: 'lfn' and 'parents' # value for 'lfn' is a string, value for 'parents' is a list of {'lfn':lfn} dictionaries # [{'lfn':inputlfn, 'parents':[{'lfn':parentlfn1},{'lfn':parentlfn2}], ....]},...] # to properly prepare the tweak we reuse code fom WMTweak.py: # https://github.com/dmwm/WMCore/blob/bb573b442a53717057c169b05ae4fae98f31063b/src/python/PSetTweaks/WMTweak.py#L415-L441 primaryFiles = [] secondaryFiles = [] for inputFile in inputFiles: # make sure input is always in format 3. if not isinstance(inputFile, dict): inputFile = {'lfn': inputFile, 'parents': []} if inputFile["lfn"].startswith("MCFakeFile"): # for MC which uses "EmptySource" there must be no inputFile continue primaryFiles.append(inputFile["lfn"]) for secondaryFile in inputFile["parents"]: secondaryFiles.append(secondaryFile["lfn"]) print("Adding %d files to 'fileNames' attr" % len(primaryFiles)) print("Adding %d files to 'secondaryFileNames' attr" % len(secondaryFiles)) if len(primaryFiles) > 0: tweak.addParameter( "process.source.fileNames", "customTypeCms.untracked.vstring(%s)" % primaryFiles) if len(secondaryFiles) > 0: tweak.addParameter( "process.source.secondaryFileNames", "customTypeCms.untracked.vstring(%s)" % secondaryFiles) # for rearranging runsAndLumis into the structure needed by CMSSW, reuse code taken from # https://github.com/dmwm/WMCore/blob/bb573b442a53717057c169b05ae4fae98f31063b/src/python/PSetTweaks/WMTweak.py#L482 if runAndLumis: lumisToProcess = [] for run in runAndLumis.keys(): lumiPairs = runAndLumis[run] for lumiPair in lumiPairs: if len(lumiPair) != 2: # Do nothing continue lumisToProcess.append("%s:%s-%s:%s" % (run, lumiPair[0], run, lumiPair[1])) tweak.addParameter( "process.source.lumisToProcess", "customTypeCms.untracked.VLuminosityBlockRange(%s)" % lumisToProcess) # how many events to process if opts.firstEvent: tweak.addParameter( "process.source.firstEvent", "customTypeCms.untracked.uint32(%s)" % opts.firstEvent) if opts.firstEvent is None or opts.lastEvent is None: # what to process is define in runAndLumis, we do no split by events here maxEvents = -1 else: # for MC CRAB passes 1st/last event, but cmsRun wants 1st ev + MaxEvents maxEvents = int(opts.lastEvent) - int(opts.firstEvent) + 1 opts.lastEvent = None # for MC there has to be no lastEvent tweak.addParameter("process.maxEvents.input", "customTypeCms.untracked.int32(%s)" % maxEvents) if opts.lastEvent: tweak.addParameter( "process.source.lastEvent", "customTypeCms.untracked.uint32(%s)" % opts.lastEvent) # firstLumi, firstRun and eventsPerLumi are used for MC if opts.firstLumi: tweak.addParameter( "process.source.firstLuminosityBlock", "customTypeCms.untracked.uint32(%s)" % opts.firstLumi) if opts.firstRun: tweak.addParameter( "process.source.firstRun", "customTypeCms.untracked.uint32(%s)" % opts.firstRun) if opts.eventsPerLumi: numberEventsInLuminosityBlock = "customTypeCms.untracked.uint32(%s)" % opts.eventsPerLumi tweak.addParameter("process.source.numberEventsInLuminosityBlock", numberEventsInLuminosityBlock) # time-limited running is used by automatic splitting probe jobs if opts.maxRuntime: maxSecondsUntilRampdown = "customTypeCms.untracked.int32(%s)" % opts.maxRuntime tweak.addParameter("process.maxSecondsUntilRampdown.input", maxSecondsUntilRampdown) # event limiter for testing if opts.oneEventMode in ["1", "True", True]: tweak.addParameter("process.maxEvents.input", "customTypeCms.untracked.int32(1)") # make sure that FJR contains useful statistics, reuse code from # https://github.com/dmwm/WMCore/blob/c2fa70af3b4c5285d50e6a8bf48636232f738340/src/python/WMCore/WMRuntime/Scripts/SetupCMSSWPset.py#L289-L307 tweak.addParameter("process.CPU", "customTypeCms.Service('CPU')") tweak.addParameter( "process.Timing", "customTypeCms.Service('Timing', summaryOnly=cms.untracked.bool(True))" ) tweak.addParameter( "process.SimpleMemoryCheck", "customTypeCms.Service('SimpleMemoryCheck', jobReportOutputOnly=cms.untracked.bool(True))" ) # tweak ! psetTweakJson = "PSetTweak.json" tweak.persist(psetTweakJson, formatting='simplejson') procScript = "edm_pset_tweak.py" pklOut = pklIn + '-tweaked' # we always create untracked psets in our tweaks cmd = "%s --input_pkl %s --output_pkl %s --json %s --create_untracked_psets" % ( procScript, pklIn, pklOut, psetTweakJson) commandLines = createTweakingCommandLines(cmd, pklIn, pklOut) # there a few more things to do which require running different EDM/CMSSW commands #1. enable LazyDownload of LHE files (if needed) if opts.lheInputFiles: pklOut = pklIn + '-lazy' procScript = "cmssw_enable_lazy_download.py" cmd = "%s --input_pkl %s --output_pkl %s" % (procScript, pklIn, pklOut) moreLines = createTweakingCommandLines(cmd, pklIn, pklOut) commandLines += moreLines #2. make sure random seeds are initialized pklOut = pklIn + '-seeds' procScript = "cmssw_handle_random_seeds.py" cmd = "%s --input_pkl %s --output_pkl %s --seeding dummy" % (procScript, pklIn, pklOut) moreLines += createTweakingCommandLines(cmd, pklIn, pklOut) commandLines += moreLines #3. make sure that process.maxEvents.input is propagated to Producers, see: # https://github.com/dmwm/WMCore/blob/85d6d423f0a85fdedf78b65ca8b7b81af9263789/src/python/WMCore/WMRuntime/Scripts/SetupCMSSWPset.py#L448-L465 pklOut = pklIn + '-nEvents' procScript = 'cmssw_handle_nEvents.py' cmd = "%s --input_pkl %s --output_pkl %s" % (procScript, pklIn, pklOut) moreLines = createTweakingCommandLines(cmd, pklIn, pklOut) commandLines += moreLines return commandLines
def makeJobTweak(job): """ _makeJobTweak_ Convert information from a WMBS Job object into a PSetTweak that can be used to modify a CMSSW process. """ result = PSetTweak() baggage = job.getBaggage() # Check in the baggage if we are processing .lhe files lheInput = getattr(baggage, "lheInputFiles", False) # Input files and secondary input files. primaryFiles = [] secondaryFiles = [] for inputFile in job["input_files"]: if inputFile["lfn"].startswith("MCFakeFile"): # If there is a preset lumi in the mask, use it as the first # luminosity setting if job['mask'].get('FirstLumi', None) != None: result.addParameter("process.source.firstLuminosityBlock", job['mask']['FirstLumi']) else: #We don't have lumi information in the mask, raise an exception raise WMTweakMaskError(job['mask'], "No first lumi information provided") continue primaryFiles.append(inputFile["lfn"]) for secondaryFile in inputFile["parents"]: secondaryFiles.append(secondaryFile["lfn"]) if len(primaryFiles) > 0: result.addParameter("process.source.fileNames", primaryFiles) if len(secondaryFiles) > 0: result.addParameter("process.source.secondaryFileNames", secondaryFiles) elif not lheInput: #First event parameter should be set from whatever the mask says, #That should have the added protection of not going over 2^32 - 1 #If there is nothing in the mask, then we fallback to the counter method if job['mask'].get('FirstEvent', None) != None: result.addParameter("process.source.firstEvent", job['mask']['FirstEvent']) else: #No first event information in the mask, raise and error raise WMTweakMaskError( job['mask'], "No first event information provided in the mask") mask = job['mask'] # event limits maxEvents = mask.getMaxEvents() if maxEvents == None: maxEvents = -1 result.addParameter("process.maxEvents.input", maxEvents) # We don't want to set skip events for MonteCarlo jobs which have # no input files. firstEvent = mask['FirstEvent'] if firstEvent != None and firstEvent >= 0 and (len(primaryFiles) > 0 or lheInput): if lheInput: result.addParameter("process.source.skipEvents", firstEvent - 1) else: result.addParameter("process.source.skipEvents", firstEvent) firstRun = mask['FirstRun'] if firstRun != None: result.addParameter("process.source.firstRun", firstRun) elif not len(primaryFiles): #Then we have a MC job, we need to set firstRun to 1 logging.debug("MCFakeFile initiated without job FirstRun - using one.") result.addParameter("process.source.firstRun", 1) runs = mask.getRunAndLumis() lumisToProcess = [] for run in runs.keys(): lumiPairs = runs[run] for lumiPair in lumiPairs: if len(lumiPair) != 2: # Do nothing continue lumisToProcess.append("%s:%s-%s:%s" % (run, lumiPair[0], run, lumiPair[1])) if len(lumisToProcess) > 0: result.addParameter("process.source.lumisToProcess", lumisToProcess) # install any settings from the per job baggage procSection = getattr(baggage, "process", None) if procSection == None: return result baggageParams = decomposeConfigSection(procSection) for k, v in baggageParams.items(): result.addParameter(k, v) return result