def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitArgs = {} if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["max_events_per_lumi"] = 100000 elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = {} if self.skimJobSplitAlgo == "EventBased" or self.skimJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob if self.skimJobSplitAlgo == "EventAwareLumiBased": self.skimJobSplitArgs["max_events_per_lumi"] = 20000 elif self.skimJobSplitAlgo == "LumiBased": self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.skimJobSplitAlgo == "FileBased": self.skimJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {"files_per_job": 1, "include_parents": True}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a MonteCarloFromGEN workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) self.procConfigCacheID = arguments.get("ProcConfigCacheID") # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {"lumis_per_job": 1}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a DQMHarvest workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.workload.setDashboardActivity("harvesting") self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) self.workload.setWorkQueueSplitPolicy("Dataset", "FileBased", {"files_per_job": 99999}) # also creates the logCollect job by default self.addDQMHarvestTask(uploadProxy=self.dqmUploadProxy, periodic_harvest_interval=self.periodicHarvestInterval, dqmHarvestUnit=self.dqmHarvestUnit) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.arguments = arguments self.couchURL = arguments['CouchURL'] self.couchDBName = arguments['CouchDBName'] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments.get("GlobalTag", None) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.emulation = arguments.get("Emulation", False) numTasks = arguments['TaskChain'] for i in range(1, numTasks+1): #consistency check that there are numTasks defined in the request: if not arguments.has_key("Task%s" % i): msg = "Specified number of tasks: %s does not match defined task dictionary for Task%s" % (i, i) raise RuntimeError, msg taskConf = getTaskN(arguments, i) parent = parentTaskName(taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf.get("BlockBlacklist", []) self.blockWhitelist = taskConf.get("BlockWhitelist", []) self.runBlacklist = taskConf.get("RunBlacklist", []) self.runWhitelist = taskConf.get("RunWhitelist", []) parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgorithm'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgorithm'], taskConf['SplittingArguments']) self.setupTask(task, taskConf) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf return self.workload
def __call__(self, workloadName, arguments): """ Create a workload instance for an Analysis request """ StdBase.__call__(self, workloadName, arguments) self.minMergeSize = 1 if self.Lumis and self.analysisJobSplitAlgo not in ['LumiBased']: raise RuntimeError('Running on selected lumis only supported in split mode(s) %s' % 'LumiBased') if self.analysisJobSplitAlgo == 'EventBased': self.analysisJobSplitArgs = {'events_per_job' : self.eventsPerJob} elif self.analysisJobSplitAlgo == 'LumiBased': self.analysisJobSplitArgs = {'lumis_per_job' : self.lumisPerJob} if self.Lumis: self.analysisJobSplitArgs.update({'lumis' : self.Lumis}) self.analysisJobSplitArgs.update({'runs' : self.Runs}) self.analysisJobSplitArgs.update( {'halt_job_on_file_boundaries' : False, 'splitOnRun' : False, }) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a Repack workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.outputs = arguments['Outputs'] # job splitting parameters self.repackSplitArgs = {} self.repackSplitArgs['maxSizeSingleLumi'] = arguments['MaxSizeSingleLumi'] self.repackSplitArgs['maxSizeMultiLumi'] = arguments['MaxSizeMultiLumi'] self.repackSplitArgs['maxInputEvents'] = arguments['MaxInputEvents'] self.repackSplitArgs['maxInputFiles'] = arguments['MaxInputFiles'] self.repackSplitArgs['maxLatency'] = arguments['MaxLatency'] self.repackMergeSplitArgs = {} self.repackMergeSplitArgs['minInputSize'] = arguments['MinInputSize'] self.repackMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize'] self.repackMergeSplitArgs['maxEdmSize'] = arguments['MaxEdmSize'] self.repackMergeSplitArgs['maxOverSize'] = arguments['MaxOverSize'] self.repackMergeSplitArgs['maxInputEvents'] = arguments['MaxInputEvents'] self.repackMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles'] self.repackMergeSplitArgs['maxLatency'] = arguments['MaxLatency'] return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a DQMHarvest workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.workload.setDashboardActivity("harvesting") self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) splitArgs = {"runs_per_job": 1} if self.dqmHarvestUnit == "multiRun": # then it should result in a single job in the end, very high number of runs splitArgs['runs_per_job'] = 999999 self.workload.setWorkQueueSplitPolicy("Dataset", "Harvest", splitArgs) # also creates the logCollect job by default self.addDQMHarvestTask(uploadProxy=self.dqmUploadProxy, periodic_harvest_interval=self.periodicHarvestInterval, dqmHarvestUnit=self.dqmHarvestUnit) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() for i in range(1, self.taskChain + 1): originalTaskConf = arguments["Task%d" % i] taskConf = {} # Make a shallow copy of the taskConf for k, v in originalTaskConf.items(): taskConf[k] = v parent = taskConf.get("InputTask", None) self.modifyTaskConfiguration(taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] if taskConf['Multicore'] and taskConf['Multicore'] != 'None': self.multicoreNCores = int(taskConf['Multicore']) parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(task, taskConf) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf self.workload.ignoreOutputModules(self.ignoredOutputModules) return self.workload
def __call__(self, workloadName, arguments): """ __call__ Create a StepChain workload with the given parameters. Configures the workload based on the first task information, then properly sets up the remaining tasks. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() # Update the task configuration taskConf = {} for k, v in arguments["Step1"].iteritems(): taskConf[k] = v self.modifyTaskConfiguration(taskConf, True, 'InputDataset' not in taskConf) self.inputPrimaryDataset = self.getStepValue('PrimaryDataset', taskConf, self.primaryDataset) self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] self.splittingAlgo = taskConf['SplittingAlgo'] # Create the first task firstTask = self.workload.newTask(taskConf['StepName']) # Create a proper task and set workload level arguments if isGenerator(arguments): self.workload.setDashboardActivity("production") self.workload.setWorkQueueSplitPolicy( "MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(firstTask, taskConf) else: self.workload.setDashboardActivity("processing") self.workload.setWorkQueueSplitPolicy( "Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(firstTask, taskConf) # Now modify this task to add the next steps if self.stepChain > 1: self.setupNextSteps(firstTask, arguments) self.workload.setStepMapping(self.stepMapping) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) # Feed values back to save in couch if self.eventsPerJob: arguments['Step1']['EventsPerJob'] = self.eventsPerJob if self.eventsPerLumi: arguments['Step1']['EventsPerLumi'] = self.eventsPerLumi return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() for i in range(1, self.taskChain + 1): originalTaskConf = arguments["Task%d" % i] taskConf = {} # Make a shallow copy of the taskConf for k,v in originalTaskConf.items(): taskConf[k] = v parent = taskConf.get("InputTask", None) self.modifyTaskConfiguration(taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] if taskConf['Multicore'] and taskConf['Multicore'] != 'None': self.multicoreNCores = int(taskConf['Multicore']) parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(task, taskConf) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf self.workload.ignoreOutputModules(self.ignoredOutputModules) return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.frameworkVersion = arguments['CMSSWVersion'] self.globalTag = arguments['GlobalTag'] self.writeTiers = arguments['WriteTiers'] self.alcaSkims = arguments['AlcaSkims'] self.inputDataset = arguments['InputDataset'] self.promptSkims = arguments['PromptSkims'] self.couchURL = arguments['CouchURL'] self.couchDBName = arguments['CouchDBName'] self.configCacheUrl = arguments.get("ConfigCacheUrl", None) self.initCommand = arguments['InitCommand'] #Optional parameters self.envPath = arguments.get('EnvPath', None) self.binPath = arguments.get('BinPath', None) if arguments.has_key('Multicore'): numCores = arguments.get('Multicore') if numCores == None or numCores == "": self.multicore = False elif numCores == "auto": self.multicore = True self.multicoreNCores = "auto" else: self.multicore = True self.multicoreNCores = numCores # Do we run log collect ? (Tier0 does not support it yet) self.doLogCollect = arguments.get("DoLogCollect", True) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "EventBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {"events_per_job": 500}) self.skimJobSplitAlgo = arguments.get("SkimJobSplitAlgo", "FileBased") self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {"files_per_job": 1, "include_parents": True}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a DataProcessing workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] # One of these parameters must be set. if arguments.has_key("ProdConfigCacheID"): self.procConfigCacheID = arguments["ProdConfigCacheID"] else: self.procConfigCacheID = arguments.get("ProcConfigCacheID", None) if arguments.has_key("Scenario"): self.procScenario = arguments.get("Scenario", None) else: self.procScenario = arguments.get("ProcScenario", None) if arguments.has_key("Multicore"): numCores = arguments.get("Multicore") if numCores == None or numCores == "": self.multicore = False elif numCores == "auto": self.multicore = True self.multicoreNCores = "auto" else: self.multicore = True self.multicoreNCores = numCores # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {"lumis_per_job": 8, "include_parents": self.includeParents}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Create a workload instance for a MonteCarlo request """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputPrimaryDataset = arguments["PrimaryDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] self.seeding = arguments.get("Seeding", "AutomaticSeeding") self.configCacheID = arguments["ConfigCacheID"] # Splitting arguments timePerEvent = int(arguments.get("TimePerEvent", 60)) filterEfficiency = float(arguments.get("FilterEfficiency", 1.0)) totalTime = int(arguments.get("TotalTime", 9 * 3600)) self.totalEvents = int(int(arguments["RequestNumEvents"]) / filterEfficiency) self.firstEvent = int(arguments.get("FirstEvent", 1)) self.firstLumi = int(arguments.get("FirstLumi", 1)) # We don't write out every event in MC, adjust the size per event accordingly self.sizePerEvent = self.sizePerEvent * filterEfficiency # pileup configuration for the first generation task self.pileupConfig = arguments.get("PileupConfig", None) #Events per lumi configuration (Allow others to inherit) self.eventsPerLumi = arguments.get("EventsPerLumi", None) if self.eventsPerLumi != None: self.eventsPerLumi = int(self.eventsPerLumi) # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] self.configCacheUrl = arguments.get("ConfigCacheUrl", None) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. eventsPerJob = int(totalTime/timePerEvent/filterEfficiency) self.prodJobSplitAlgo = arguments.get("ProdJobSplitAlgo", "EventBased") self.prodJobSplitArgs = arguments.get("ProdJobSplitArgs", {"events_per_job": eventsPerJob}) self.previousJobCount = 0 if self.firstEvent > 1 or self.firstLumi > 1: self.previousJobCount = int(math.ceil(self.firstEvent/float(self.prodJobSplitArgs["events_per_job"]))) self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount return self.buildWorkload()
def __call__(self, workloadName, arguments): """ __call__ Create a StepChain workload with the given parameters. Configures the workload based on the first task information, then properly sets up the remaining tasks. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() # Update the task configuration taskConf = {} for k, v in arguments["Step1"].iteritems(): taskConf[k] = v self.modifyTaskConfiguration(taskConf, True, 'InputDataset' not in taskConf) if taskConf['Multicore'] and taskConf['Multicore'] != 'None': self.multicoreNCores = int(taskConf['Multicore']) self.inputPrimaryDataset = taskConf.get("PrimaryDataset", self.primaryDataset) self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] self.splittingAlgo = taskConf['SplittingAlgo'] # Create the first task firstTask = self.workload.newTask(taskConf['StepName']) # Create a proper task and set workload level arguments if isGenerator(arguments): self.workload.setDashboardActivity("production") self.workload.setWorkQueueSplitPolicy( "MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(firstTask, taskConf) else: self.workload.setDashboardActivity("processing") self.workload.setWorkQueueSplitPolicy( "Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(firstTask, taskConf) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) # Now modify this task to add the next steps if self.stepChain > 1: self.setupNextSteps(firstTask, arguments) # All tasks need to have this parameter set self.workload.setTaskPropertiesFromWorkload() return self.workload
def __call__(self, workloadName, arguments): """ __call__ Create a StepChain workload with the given parameters. Configures the workload based on the first task information, then properly sets up the remaining tasks. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() # Update the task configuration taskConf = {} for k, v in arguments["Step1"].iteritems(): taskConf[k] = v self.modifyTaskConfiguration(taskConf, True, 'InputDataset' not in taskConf) self.inputPrimaryDataset = self.getStepValue('PrimaryDataset', taskConf, self.primaryDataset) self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] self.splittingAlgo = taskConf['SplittingAlgo'] # Create the first task firstTask = self.workload.newTask(taskConf['StepName']) # Create a proper task and set workload level arguments if isGenerator(arguments): self.workload.setDashboardActivity("production") self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(firstTask, taskConf) else: self.workload.setDashboardActivity("processing") self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(firstTask, taskConf) # Now modify this task to add the next steps if self.stepChain > 1: self.setupNextSteps(firstTask, arguments) self.workload.setStepMapping(self.stepMapping) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) # Feed values back to save in couch if self.eventsPerJob: arguments['Step1']['EventsPerJob'] = self.eventsPerJob if self.eventsPerLumi: arguments['Step1']['EventsPerLumi'] = self.eventsPerLumi return self.workload
def __call__(self, workloadName, arguments): """ Create a workload instance for a MonteCarlo request """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputPrimaryDataset = arguments["PrimaryDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] self.seeding = arguments.get("Seeding", "AutomaticSeeding") self.configCacheID = arguments["ConfigCacheID"] # Splitting arguments timePerEvent = int(arguments.get("TimePerEvent", 60)) filterEfficiency = float(arguments.get("FilterEfficiency", 1.0)) totalTime = int(arguments.get("TotalTime", 9 * 3600)) self.totalEvents = int(int(arguments["RequestNumEvents"]) / filterEfficiency) self.firstEvent = int(arguments.get("FirstEvent", 1)) self.firstLumi = int(arguments.get("FirstLumi", 1)) # pileup configuration for the first generation task self.pileupConfig = arguments.get("PileupConfig", None) #Events per lumi configuration (Allow others to inherit) self.eventsPerLumi = arguments.get("EventsPerLumi", None) if self.eventsPerLumi != None: self.eventsPerLumi = int(self.eventsPerLumi) # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] self.configCacheUrl = arguments.get("ConfigCacheUrl", None) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. eventsPerJob = int(totalTime/timePerEvent/filterEfficiency) self.prodJobSplitAlgo = arguments.get("ProdJobSplitAlgo", "EventBased") self.prodJobSplitArgs = arguments.get("ProdJobSplitArgs", {"events_per_job": eventsPerJob}) self.previousJobCount = 0 if self.firstEvent > 1 or self.firstLumi > 1: self.previousJobCount = int(math.ceil(self.firstEvent/float(self.prodJobSplitArgs["events_per_job"]))) self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a Express workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] self.globalTagTransaction = arguments["GlobalTagTransaction"] self.procScenario = arguments['ProcScenario'] self.alcaSkims = arguments['AlcaSkims'] self.dqmSequences = arguments['DqmSequences'] self.outputs = arguments['Outputs'] self.dqmUploadProxy = arguments['DQMUploadProxy'] self.alcaHarvestTimeout = arguments['AlcaHarvestTimeout'] self.alcaHarvestDir = arguments['AlcaHarvestDir'] self.streamName = arguments['StreamName'] # job splitting parameters (also required parameters) self.expressSplitArgs = {} self.expressSplitArgs['maxInputEvents'] = arguments['MaxInputEvents'] self.expressMergeSplitArgs = {} self.expressMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize'] self.expressMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles'] self.expressMergeSplitArgs['maxLatency'] = arguments['MaxLatency'] if arguments.has_key("Multicore"): numCores = arguments.get("Multicore") if numCores == None or numCores == "": self.multicore = False elif numCores == "auto": self.multicore = True self.multicoreNCores = "auto" else: self.multicore = True self.multicoreNCores = numCores # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # fixed parameters that are used in various places self.alcaHarvestOutLabel = "Sqlite" return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a Repack workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.frameworkVersion = arguments["CMSSWVersion"] self.procScenario = arguments['ProcScenario'] self.outputs = arguments['Outputs'] # crashes if this isn't set self.globalTag = "NOTSET" # job splitting parameters self.repackSplitArgs = {} self.repackSplitArgs['maxSizeSingleLumi'] = arguments['MaxSizeSingleLumi'] self.repackSplitArgs['maxSizeMultiLumi'] = arguments['MaxSizeMultiLumi'] self.repackSplitArgs['maxInputEvents'] = arguments['MaxInputEvents'] self.repackSplitArgs['maxInputFiles'] = arguments['MaxInputFiles'] self.repackMergeSplitArgs = {} self.repackMergeSplitArgs['minInputSize'] = arguments['MinInputSize'] self.repackMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize'] self.repackMergeSplitArgs['maxEdmSize'] = arguments['MaxEdmSize'] self.repackMergeSplitArgs['maxOverSize'] = arguments['MaxOverSize'] self.repackMergeSplitArgs['maxInputEvents'] = arguments['MaxInputEvents'] self.repackMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles'] if arguments.has_key("Multicore"): numCores = arguments.get("Multicore") if numCores == None or numCores == "": self.multicore = False elif numCores == "auto": self.multicore = True self.multicoreNCores = "auto" else: self.multicore = True self.multicoreNCores = numCores # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ __call__ Create a StepChain workload with the given parameters. Configures the workload based on the first task information, then properly sets up the remaining tasks. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() # Update the task configuration taskConf = {} for k, v in arguments["Step1"].iteritems(): taskConf[k] = v self.modifyTaskConfiguration(taskConf, True, 'InputDataset' not in taskConf) if taskConf['Multicore'] and taskConf['Multicore'] != 'None': self.multicoreNCores = int(taskConf['Multicore']) self.inputPrimaryDataset = taskConf.get("PrimaryDataset", self.primaryDataset) self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] self.splittingAlgo = taskConf['SplittingAlgo'] # Create the first task firstTask = self.workload.newTask(taskConf['StepName']) # Create a proper task and set workload level arguments if isGenerator(arguments): self.workload.setDashboardActivity("production") self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(firstTask, taskConf) else: self.workload.setDashboardActivity("processing") self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(firstTask, taskConf) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) # Now modify this task to add the next steps if self.stepChain > 1: self.setupNextSteps(firstTask, arguments) # All tasks need to have this parameter set self.workload.setTaskPropertiesFromWorkload() return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a ReDigi workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] self.configCacheUrl = arguments.get("ConfigCacheUrl", None) # Pull down the configs and the names of the output modules so that # we can chain things together properly. self.stepOneOutputModuleName = arguments.get("StepOneOutputModuleName", None) self.stepTwoOutputModuleName = arguments.get("StepTwoOutputModuleName") self.stepOneConfigCacheID = arguments.get("StepOneConfigCacheID") self.stepTwoConfigCacheID = arguments.get("StepTwoConfigCacheID", None) self.stepThreeConfigCacheID = arguments.get("StepThreeConfigCacheID") self.keepStepOneOutput = arguments.get("KeepStepOneOutput", True) self.keepStepTwoOutput = arguments.get("KeepStepTwoOutput", True) # Pileup configuration for the first generation task self.pileupConfig = arguments.get("PileupConfig", None) # Optional arguments that default to something reasonable. self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {"lumis_per_job": 8, "include_parents": self.includeParents}) return self.buildWorkload()
def __call__(self, workloadName, arguments): StdBase.__call__(self, workloadName, arguments) # Handle the default of the various splitting algorithms self.procJobSplitArgs = {"include_parents": self.includeParents} if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["max_events_per_lumi"] = 20000 elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob return
def __call__(self, workloadName, arguments): StdBase.__call__(self, workloadName, arguments) # Handle the default of the various splitting algorithms self.procJobSplitArgs = {"include_parents" : self.includeParents} if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["max_events_per_lumi"] = 20000 elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob return
def __call__(self, workloadName, arguments): """ _call_ Create a DataProcessing workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] # Get the ConfigCacheID self.configCacheID = arguments.get("ConfigCacheID", None) # or alternatively CouchURL part can be replaced by ConfigCacheUrl, # then ConfigCacheUrl + CouchDBName + ConfigCacheID self.configCacheUrl = arguments.get("ConfigCacheUrl", None) # Optional output modules that will not be merged but may be used by subsequent steps self.transientModules = arguments.get("TransientOutputModules", []) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get( "DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased") self.procJobSplitArgs = arguments.get( "StdJobSplitArgs", { "lumis_per_job": 8, "include_parents": self.includeParents }) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Store the arguments in attributes with the proper formatting. """ StdBase.__call__(self, workloadName, arguments) # Adjust the events by the filter efficiency self.totalEvents = int(self.requestNumEvents / self.filterEfficiency) # We don't write out every event in MC, # adjust the size per event accordingly self.sizePerEvent = self.sizePerEvent * self.filterEfficiency # Tune the splitting, only EventBased is allowed for MonteCarlo # 8h jobs are CMS standard, set the default with that in mind self.prodJobSplitAlgo = "EventBased" self.eventsPerJob, self.eventsPerLumi = StdBase.calcEvtsPerJobLumi( self.eventsPerJob, self.eventsPerLumi, self.timePerEvent) self.prodJobSplitArgs = { "events_per_job": self.eventsPerJob, "events_per_lumi": self.eventsPerLumi, "lheInputFiles": self.lheInputFiles } # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.prodJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) # Production can be extending statistics, # need to move the initial lfn counter self.previousJobCount = 0 if self.firstLumi > 1: self.previousJobCount = int( math.ceil((self.firstEvent - 1) / self.eventsPerJob)) self.prodJobSplitArgs[ "initial_lfn_counter"] = self.previousJobCount # Feed values back to save in couch arguments['EventsPerJob'] = self.eventsPerJob return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a DataProcessing workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] # DataProcessing is split by block and can receive more blocks after first split for certain delay self.openRunningTimeout = int(arguments.get("OpenRunningTimeout", 0)) # Get the ConfigCacheID self.configCacheID = arguments.get("ConfigCacheID", None) # or alternatively CouchURL part can be replaced by ConfigCacheUrl, # then ConfigCacheUrl + CouchDBName + ConfigCacheID self.configCacheUrl = arguments.get("ConfigCacheUrl", None) # Optional output modules that will not be merged but may be used by subsequent steps self.transientModules = arguments.get("TransientOutputModules", []) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {"lumis_per_job": 8, "include_parents": self.includeParents}) return self.buildWorkload()
def __call__(self, workloadName, arguments): StdBase.__call__(self, workloadName, arguments) # Handle the default of the various splitting algorithms self.procJobSplitArgs = {"include_parents": self.includeParents} if self.procJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]: if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["job_time_limit"] = 48 * 3600 # 2 days self.procJobSplitArgs["events_per_job"] = self.eventsPerJob arguments['EventsPerJob'] = self.eventsPerJob elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob return
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitArgs = {} if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["max_events_per_lumi"] = 100000 self.procJobSplitArgs["capJobTime"] = 47 * 3600 if self.multicore: self.procJobSplitArgs["capJobDisk"] = max( self.multicoreNCores * 20000000, 80000000) else: self.procJobSplitArgs["capJobDisk"] = 80000000 elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = {} if self.skimJobSplitAlgo == "EventBased" or self.skimJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob if self.skimJobSplitAlgo == "EventAwareLumiBased": self.skimJobSplitArgs["max_events_per_lumi"] = 20000 elif self.skimJobSplitAlgo == "LumiBased": self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.skimJobSplitAlgo == "FileBased": self.skimJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", { "files_per_job": 1, "include_parents": True }) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Store the arguments in attributes with the proper formatting. """ StdBase.__call__(self, workloadName, arguments) # Adjust the events by the filter efficiency self.totalEvents = int(self.requestNumEvents / self.filterEfficiency) # We don't write out every event in MC, # adjust the size per event accordingly self.sizePerEvent = self.sizePerEvent * self.filterEfficiency # Tune the splitting, only EventBased is allowed for MonteCarlo # 8h jobs are CMS standard, set the default with that in mind self.prodJobSplitAlgo = "EventBased" self.eventsPerJob, self.eventsPerLumi = StdBase.calcEvtsPerJobLumi(self.eventsPerJob, self.eventsPerLumi, self.timePerEvent) self.prodJobSplitArgs = {"events_per_job": self.eventsPerJob, "events_per_lumi": self.eventsPerLumi, "lheInputFiles": self.lheInputFiles} # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.prodJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) # Production can be extending statistics, # need to move the initial lfn counter self.previousJobCount = 0 if self.firstLumi > 1: self.previousJobCount = int(math.ceil((self.firstEvent - 1) / self.eventsPerJob)) self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount # Feed values back to save in couch arguments['EventsPerJob'] = self.eventsPerJob return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.frameworkVersion = arguments['CMSSWVersion'] self.globalTag = arguments['GlobalTag'] self.procScenario = arguments['ProcScenario'] self.writeTiers = arguments['WriteTiers'] self.alcaSkims = arguments['AlcaSkims'] self.inputDataset = arguments['InputDataset'] if arguments.has_key('Multicore'): numCores = arguments.get('Multicore') if numCores == None or numCores == "": self.multicore = False elif numCores == "auto": self.multicore = True self.multicoreNCores = "auto" else: self.multicore = True self.multicoreNCores = numCores # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "FileBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Create a workload instance for a MonteCarlo request """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputPrimaryDataset = arguments["PrimaryDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] self.seeding = arguments.get("Seeding", "AutomaticSeeding") self.prodConfigCacheID = arguments["ProcConfigCacheID"] # Splitting arguments timePerEvent = int(arguments.get("TimePerEvent", 60)) filterEfficiency = float(arguments.get("FilterEfficiency", 1.0)) totalTime = int(arguments.get("TotalTime", 9 * 3600)) self.totalEvents = int(int(arguments["RequestNumEvents"]) / filterEfficiency) # pileup configuration for the first generation task self.pileupConfig = arguments.get("PileupConfig", None) # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. eventsPerJob = int(totalTime/timePerEvent/filterEfficiency) self.prodJobSplitAlgo = arguments.get("ProdJobSplitAlgo", "EventBased") self.prodJobSplitArgs = arguments.get("ProdJobSplitArgs", {"events_per_job": eventsPerJob}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ __call__ Create a RelValMC workload with the given parametrs. """ StdBase.__call__(self, workloadName, arguments) self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] # Required parameters relevant to the MC generation. self.genConfigCacheID = arguments["GenConfigCacheID"] self.inputPrimaryDataset = arguments["PrimaryDataset"] self.totalEvents = arguments["RequestNumEvents"] self.seeding = arguments.get("Seeding", "AutomaticSeeding") self.pileupConfig = arguments.get("PileupConfig", None) # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] self.configCacheUrl = arguments.get("ConfigCacheUrl", None) # Generation step parameters self.genJobSplitAlgo = arguments.get("GenJobSplitAlgo", "EventBased") self.genJobSplitArgs = arguments.get("GenJobSplitArgs", {"events_per_job": 1000}) # Processing step parameteras self.procJobSplitAlgo = arguments.get("ProcJobSplitAlgo", "FileBased") self.procJobSplitArgs = arguments.get("ProcJobSplitArgs", {"files_per_job": 1}) self.genOutputModuleName = arguments.get("GenOutputModuleName", None) self.stepOneOutputModuleName = arguments.get("StepOneOutputModuleName", None) self.stepOneConfigCacheID = arguments["StepOneConfigCacheID"] self.stepTwoConfigCacheID = arguments["StepTwoConfigCacheID"] return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Store the arguments in attributes with the proper formatting. """ StdBase.__call__(self, workloadName, arguments) # Adjust the events by the filter efficiency self.totalEvents = int(self.requestNumEvents / self.filterEfficiency) # We don't write out every event in MC, # adjust the size per event accordingly self.sizePerEvent = self.sizePerEvent * self.filterEfficiency # Tune the splitting, only EventBased is allowed for MonteCarlo # 8h jobs are CMS standard, set the default with that in mind self.prodJobSplitAlgo = "EventBased" if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) if self.eventsPerLumi is None: self.eventsPerLumi = self.eventsPerJob self.prodJobSplitArgs = { "events_per_job": self.eventsPerJob, "events_per_lumi": self.eventsPerLumi, "lheInputFiles": self.lheInputFiles, } # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Production can be extending statistics, # need to move the initial lfn counter self.previousJobCount = 0 if self.firstLumi > 1: lumisPerJob = int(float(self.eventsPerJob) / self.eventsPerLumi) self.previousJobCount = self.firstLumi / lumisPerJob self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a MonteCarloFromGEN workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] self.configCacheUrl = arguments.get("ConfigCacheUrl", None) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get( "DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) self.configCacheID = arguments.get("ConfigCacheID") # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {"lumis_per_job": 1}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a Express workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.outputs = arguments['Outputs'] # job splitting parameters (also required parameters) self.expressSplitArgs = {} self.expressSplitArgs['maxInputRate'] = arguments['MaxInputRate'] self.expressSplitArgs['maxInputEvents'] = arguments['MaxInputEvents'] self.expressMergeSplitArgs = {} self.expressMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize'] self.expressMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles'] self.expressMergeSplitArgs['maxLatency'] = arguments['MaxLatency'] # fixed parameters that are used in various places self.alcaHarvestOutLabel = "Sqlite" return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Create a workload instance for an Analysis request """ StdBase.__call__(self, workloadName, arguments) # Parameters for users self.owner_vogroup = arguments.get("VoGroup", '') self.owner_vorole = arguments.get("VoRole", '') self.userSandbox = arguments.get("userSandbox", None) self.userFiles = arguments.get("userFiles", []) self.outputFiles = arguments.get("OutputFiles", []) self.userName = arguments.get("Username", 'jblow') self.saveLogs = arguments.get("SaveLogs", True) self.emulation = arguments.get("Emulation", False) # Workflow creation self.couchURL = arguments.get("CouchURL") self.couchDBName = arguments.get("CouchDBName", "wmagent_configcache") self.configCacheID = arguments.get("AnalysisConfigCacheDoc", None) self.configCacheUrl = arguments.get("ConfigCacheUrl", None) self.minMergeSize = 1 self.frameworkVersion = arguments["CMSSWVersion"] self.acquisitionEra = arguments.get("PublishDataName", str(int(time.time()))) self.globalTag = arguments.get("GlobalTag", None) self.inputDataset = arguments.get('InputDataset', None) self.processingVersion = arguments.get('ProcessingVersion', 1) self.origRequest = arguments.get('OriginalRequestName', '') # Sites self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.asyncDest = arguments.get("asyncDest", "T1_US_FNAL_Buffer") # ACDC and job splitting self.ACDCURL = arguments.get("ACDCUrl", "") self.ACDCDBName = arguments.get("ACDCDBName", "wmagent_acdc") self.Runs = arguments.get("Runs", None) self.Lumis = arguments.get("Lumis", None) self.Submission = arguments.get("Submission", 1) self.analysisJobSplitAlgo = arguments.get("JobSplitAlgo", "EventBased") if self.Lumis and self.analysisJobSplitAlgo not in ['LumiBased']: raise RuntimeError( 'Running on selected lumis only supported in split mode(s) %s' % 'LumiBased') if self.analysisJobSplitAlgo == 'EventBased': self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {'events_per_job': 1000}) elif self.analysisJobSplitAlgo == 'LumiBased': self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {'lumis_per_job': 15}) if self.Lumis: self.analysisJobSplitArgs.update({'lumis': self.Lumis}) self.analysisJobSplitArgs.update({'runs': self.Runs}) self.analysisJobSplitArgs.update({ 'halt_job_on_file_boundaries': False, 'splitOnRun': False, }) else: self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a StoreResults workload with the given parameters. """ # first of all, we update the merged LFN based on the physics group arguments['MergedLFNBase'] += "/" + arguments['PhysicsGroup'].lower() StdBase.__call__(self, workloadName, arguments) (inputPrimaryDataset, inputProcessedDataset, inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() mergeTask = workload.newTask("StoreResults") self.addDashboardMonitoring(mergeTask) mergeTaskCmssw = mergeTask.makeStep("cmsRun1") mergeTaskCmssw.setStepType("CMSSW") mergeTaskStageOut = mergeTaskCmssw.addStep("stageOut1") mergeTaskStageOut.setStepType("StageOut") mergeTaskLogArch = mergeTaskCmssw.addStep("logArch1") mergeTaskLogArch.setStepType("LogArchive") self.addLogCollectTask(mergeTask, taskName="StoreResultsLogCollect") mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTask.addInputDataset(name=self.inputDataset, primary=inputPrimaryDataset, processed=inputProcessedDataset, tier=inputDataTier, dbsurl=self.dbsUrl, block_blacklist=self.blockBlacklist, block_whitelist=self.blockWhitelist, run_blacklist=self.runBlacklist, run_whitelist=self.runWhitelist) splitAlgo = "ParentlessMergeBySize" mergeTask.setSplittingAlgorithm(splitAlgo, max_merge_size=self.maxMergeSize, min_merge_size=self.minMergeSize, max_merge_events=self.maxMergeEvents) mergeTaskCmsswHelper = mergeTaskCmssw.getTypeHelper() mergeTaskCmsswHelper.cmsswSetup(self.frameworkVersion, softwareEnvironment="", scramArch=self.scramArch) mergeTaskCmsswHelper.setGlobalTag(self.globalTag) mergeTaskCmsswHelper.setSkipBadFiles(True) mergeTaskCmsswHelper.setDataProcessingConfig("do_not_use", "merge") self.addOutputModule(mergeTask, "Merged", primaryDataset=inputPrimaryDataset, dataTier=self.dataTier, filterName=None, forceMerged=True) workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase) workload.setDashboardActivity("StoreResults") # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString workload.setTaskPropertiesFromWorkload() self.reportWorkflowToDashboard(workload.getDashboardActivity()) return workload
def __call__(self, workloadName, arguments): StdBase.__call__(self, workloadName, arguments) self.originalRequestName = self.initialTaskPath.split('/')[1] #TODO remove the None case when reqmgr is retired return self.buildWorkload(arguments.get("OriginalRequestCouchURL", None))
def __call__(self, workloadName, arguments): """ _call_ Create a StoreResults workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase) workload.setDashboardActivity("StoreResults") self.reportWorkflowToDashboard(workload.getDashboardActivity()) mergeTask = workload.newTask("StoreResults") self.addDashboardMonitoring(mergeTask) mergeTaskCmssw = mergeTask.makeStep("cmsRun1") mergeTaskCmssw.setStepType("CMSSW") mergeTaskStageOut = mergeTaskCmssw.addStep("stageOut1") mergeTaskStageOut.setStepType("StageOut") mergeTaskLogArch = mergeTaskCmssw.addStep("logArch1") mergeTaskLogArch.setStepType("LogArchive") mergeTask.setSiteWhitelist(self.siteWhitelist) mergeTask.setSiteBlacklist(self.siteBlacklist) self.addLogCollectTask(mergeTask, taskName = "StoreResultsLogCollect") mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTask.addInputDataset(primary = self.inputPrimaryDataset, processed = self.inputProcessedDataset, tier = self.inputDataTier, dbsurl = self.dbsUrl, block_blacklist = self.blockBlacklist, block_whitelist = self.blockWhitelist, run_blacklist = self.runBlacklist, run_whitelist = self.runWhitelist) splitAlgo = "ParentlessMergeBySize" mergeTask.setSplittingAlgorithm(splitAlgo, max_merge_size = self.maxMergeSize, min_merge_size = self.minMergeSize, max_merge_events = self.maxMergeEvents) mergeTaskCmsswHelper = mergeTaskCmssw.getTypeHelper() mergeTaskCmsswHelper.cmsswSetup(self.frameworkVersion, softwareEnvironment = "", scramArch = self.scramArch) mergeTaskCmsswHelper.setGlobalTag(self.globalTag) mergeTaskCmsswHelper.setSkipBadFiles(True) mergeTaskCmsswHelper.setDataProcessingConfig("do_not_use", "merge") self.addOutputModule(mergeTask, "Merged", primaryDataset = self.inputPrimaryDataset, dataTier = self.dataTier, filterName = None, forceMerged = True) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString workload.setTaskPropertiesFromWorkload() return workload
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.arguments = arguments self.couchURL = arguments['CouchURL'] self.couchDBName = arguments['CouchDBName'] self.configCacheUrl = arguments.get("ConfigCacheUrl", None) self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments.get("GlobalTag", None) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get( "DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.emulation = arguments.get("Emulation", False) numTasks = arguments['TaskChain'] for i in range(1, numTasks + 1): #consistency check that there are numTasks defined in the request: if not arguments.has_key("Task%s" % i): msg = "Specified number of tasks: %s does not match defined task dictionary for Task%s" % ( i, i) raise RuntimeError, msg taskConf = getTaskN(arguments, i) parent = parentTaskName(taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf.get("BlockBlacklist", []) self.blockWhitelist = taskConf.get("BlockWhitelist", []) self.runBlacklist = taskConf.get("RunBlacklist", []) self.runWhitelist = taskConf.get("RunWhitelist", []) parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule( taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy( "MonteCarlo", taskConf['SplittingAlgorithm'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy( "Block", taskConf['SplittingAlgorithm'], taskConf['SplittingArguments']) self.setupTask(task, taskConf) self.reportWorkflowToDashboard( self.workload.getDashboardActivity()) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() # Detect blow-up factor from first task in chain. blowupFactor = 1 if (self.taskChain > 1) and 'TimePerEvent' in arguments["Task1"]: origTpe = arguments["Task1"]['TimePerEvent'] if origTpe <= 0: origTpe = 1.0 sumTpe = 0 tpeCount = 0 for i in xrange(1, self.taskChain + 1): if 'TimePerEvent' in arguments["Task%d" % i]: sumTpe += arguments["Task%d" % i]['TimePerEvent'] tpeCount += 1 if tpeCount > 0: blowupFactor = sumTpe / origTpe for i in xrange(1, self.taskChain + 1): originalTaskConf = arguments["Task%d" % i] taskConf = {} # Make a shallow copy of the taskConf for k, v in originalTaskConf.items(): taskConf[k] = v parent = taskConf.get("InputTask", None) self.modifyTaskConfiguration(taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments'], blowupFactor=blowupFactor) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments'], blowupFactor=blowupFactor) self.setupTask(task, taskConf) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf self.workload.ignoreOutputModules(self.ignoredOutputModules) return self.workload
def __call__(self, workloadName, arguments): StdBase.__call__(self, workloadName, arguments) self.originalRequestName = self.initialTaskPath.split('/')[1] return self.buildWorkload(arguments)
def __call__(self, workloadName, arguments): """ Create a workload instance for an Analysis request """ StdBase.__call__(self, workloadName, arguments) self.globalTag = arguments.get("GlobalTag", None) # Required parameters. self.frameworkVersion = arguments["CMSSWVersion"] self.inputDataset = arguments["InputDataset"] self.processingVersion = arguments.get("ProcessingVersion", "v1") self.origRequest = arguments.get("OriginalRequestName", "") self.emulation = arguments.get("Emulation", False) self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.couchURL = arguments.get("CouchUrl") self.couchDBName = arguments.get("CouchDBName", "wmagent_configcache") self.analysisConfigCacheID = arguments.get("AnalysisConfigCacheDoc", None) self.ACDCURL = arguments.get("ACDCUrl", "") self.ACDCDBName = arguments.get("ACDCDBName", "wmagent_acdc") self.ACDCID = arguments.get("ACDCDoc", None) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.analysisJobSplitAlgo = arguments.get("JobSplitAlgo", "EventBased") if self.ACDCID and self.analysisJobSplitAlgo not in ["LumiBased"]: raise RuntimeError("Running on selected lumis only supported in split mode(s) %s" % "LumiBased") if self.analysisJobSplitAlgo == "EventBased": self.analysisJobSplitArgs = arguments.get("JobSplitArgs", {"events_per_job": 1000}) elif self.analysisJobSplitAlgo == "LumiBased": self.analysisJobSplitArgs = arguments.get("JobSplitArgs", {"lumis_per_job": 15}) if self.ACDCID: self.analysisJobSplitArgs.update( { "filesetName": self.ACDCID, "collectionName": self.origRequest, "couchURL": self.ACDCURL, "couchDB": self.ACDCDBName, "owner": self.owner, "group": self.group, } ) self.analysisJobSplitArgs.update({"halt_job_on_file_boundaries": False, "splitOnRun": False}) else: self.analysisJobSplitArgs = arguments.get("JobSplitArgs", {}) self.asyncDest = arguments.get("asyncDest", "T1_US_FNAL_Buffer") self.minMergeSize = 1 # arguments.get("MinMergeSize", 1) self.acquisitionEra = arguments.get("PublishDataName", str(int(time.time()))) self.owner_vogroup = arguments.get("VoGroup", "") self.owner_vorole = arguments.get("VoRole", "") self.userSandbox = arguments.get("userSandbox", None) self.userFiles = arguments.get("userFiles", []) self.userName = arguments.get("Username", "jblow") self.saveLogs = arguments.get("SaveLogs", True) self.outputFiles = arguments.get("OutputFiles", []) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a StoreResults workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] self.cmsPath = arguments["CmsPath"] # Optional arguments. self.dbsUrl = arguments.get( "DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlackList = arguments.get("BlockBlackList", []) self.blockWhiteList = arguments.get("BlockWhiteList", []) self.runBlackList = arguments.get("RunBlackList", []) self.runWhiteList = arguments.get("RunWhiteList", []) self.emulation = arguments.get("Emulation", False) self.stdJobSplitAlgo = arguments.get("StdJobSplitAlgo", 'FileBased') self.stdJobSplitArgs = arguments.get("StdJobSplitArgs", {'files_per_job': 1}) self.dataTier = arguments.get("DataTier", 'USER') self.configCacheUrl = arguments.get("ConfigCacheUrl", None) dataTier = self.dataTier (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = \ self.inputDataset[1:].split("/") processedDatasetName = "%s-%s" % (self.acquisitionEra, self.processingVersion) workload = self.createWorkload() workload.setDashboardActivity("StoreResults") self.reportWorkflowToDashboard(workload.getDashboardActivity()) mergeTask = workload.newTask("StoreResults") self.addDashboardMonitoring(mergeTask) mergeTaskCmssw = mergeTask.makeStep("cmsRun1") mergeTaskCmssw.setStepType("CMSSW") mergeTaskStageOut = mergeTaskCmssw.addStep("stageOut1") mergeTaskStageOut.setStepType("StageOut") mergeTaskLogArch = mergeTaskCmssw.addStep("logArch1") mergeTaskLogArch.setStepType("LogArchive") self.addLogCollectTask(mergeTask, taskName="StoreResultsLogCollect") mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTask.addInputDataset(primary=self.inputPrimaryDataset, processed=self.inputProcessedDataset, tier=self.inputDataTier, dbsurl=self.dbsUrl, block_blacklist=self.blockBlackList, block_whitelist=self.blockWhiteList, run_blacklist=self.runBlackList, run_whitelist=self.runWhiteList) splitAlgo = "ParentlessMergeBySize" mergeTask.setSplittingAlgorithm(splitAlgo, max_merge_size=self.maxMergeSize, min_merge_size=self.minMergeSize, max_merge_events=self.maxMergeEvents, siteWhitelist=self.siteWhitelist, siteBlacklist=self.siteBlacklist) mergeTaskCmsswHelper = mergeTaskCmssw.getTypeHelper() mergeTaskCmsswHelper.cmsswSetup(self.frameworkVersion, softwareEnvironment="", scramArch=self.scramArch) mergeTaskCmsswHelper.setDataProcessingConfig("cosmics", "merge") mergedLFN = "%s/%s/%s/%s/%s" % ( self.mergedLFNBase, self.acquisitionEra, self.inputPrimaryDataset, dataTier, self.processingVersion) mergeTaskCmsswHelper.addOutputModule( "Merged", primaryDataset=self.inputPrimaryDataset, processedDataset=processedDatasetName, dataTier=dataTier, lfnBase=mergedLFN) return workload
def __call__(self, workloadName, arguments): """ Create a workload instance for an Analysis request """ StdBase.__call__(self, workloadName, arguments) # Parameters for users self.owner_vogroup = arguments.get("VoGroup", '') self.owner_vorole = arguments.get("VoRole", '') self.userSandbox = arguments.get("userSandbox", None) self.userFiles = arguments.get("userFiles", []) self.outputFiles = arguments.get("OutputFiles", []) self.userName = arguments.get("Username",'jblow') self.saveLogs = arguments.get("SaveLogs", True) self.emulation = arguments.get("Emulation", False) # Workflow creation self.couchURL = arguments.get("CouchURL") self.couchDBName = arguments.get("CouchDBName", "wmagent_configcache") self.minMergeSize = 1 self.configCacheID = arguments.get("AnalysisConfigCacheDoc", None) self.frameworkVersion = arguments["CMSSWVersion"] self.acquisitionEra = arguments.get("PublishDataName", str(int(time.time()))) self.globalTag = arguments.get("GlobalTag", None) self.inputDataset = arguments.get('InputDataset', None) self.processingVersion = arguments.get('ProcessingVersion', '1') self.origRequest = arguments.get('OriginalRequestName', '') # Sites self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.asyncDest = arguments.get("asyncDest", "T1_US_FNAL_Buffer") # ACDC and job splitting self.ACDCURL = arguments.get("ACDCUrl", "") self.ACDCDBName = arguments.get("ACDCDBName", "wmagent_acdc") self.ACDCID = arguments.get("ACDCDoc", None) self.analysisJobSplitAlgo = arguments.get("JobSplitAlgo", "EventBased") if self.ACDCID and self.analysisJobSplitAlgo not in ['LumiBased']: raise RuntimeError('Running on selected lumis only supported in split mode(s) %s' % 'LumiBased') if self.analysisJobSplitAlgo == 'EventBased': self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {'events_per_job' : 1000}) elif self.analysisJobSplitAlgo == 'LumiBased': self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {'lumis_per_job' : 15}) if self.ACDCID: self.analysisJobSplitArgs.update( {'filesetName' : self.ACDCID, 'collectionName' : self.origRequest, 'couchURL' : self.ACDCURL, 'couchDB' : self.ACDCDBName, 'owner' : self.owner, 'group' : self.group, }) self.analysisJobSplitArgs.update( {'halt_job_on_file_boundaries' : False, 'splitOnRun' : False, }) else: self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() # Detect blow-up factor from first task in chain. blowupFactor = 1 if (self.taskChain > 1) and 'TimePerEvent' in arguments["Task1"]: origTpe = arguments["Task1"]['TimePerEvent'] if origTpe <= 0: origTpe = 1.0 sumTpe = 0 tpeCount = 0 for i in xrange(1, self.taskChain + 1): if 'TimePerEvent' in arguments["Task%d" % i]: sumTpe += arguments["Task%d" % i]['TimePerEvent'] tpeCount += 1 if tpeCount > 0: blowupFactor = sumTpe / origTpe for i in xrange(1, self.taskChain + 1): originalTaskConf = arguments["Task%d" % i] taskConf = {} # Make a shallow copy of the taskConf for k, v in originalTaskConf.items(): taskConf[k] = v parent = taskConf.get("InputTask", None) self.modifyTaskConfiguration( taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule( taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy( "MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments'], blowupFactor=blowupFactor) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy( "Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments'], blowupFactor=blowupFactor) self.setupTask(task, taskConf) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf # now that all tasks have been created, create the parent x output dataset map self.createTaskParentageMapping(arguments) self.workload.setTaskParentageMapping(self.taskOutputMapping) self.workload.ignoreOutputModules(self.ignoredOutputModules) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) # and push the parentage map to the reqmgr2 workload cache doc arguments[ 'ChainParentageMap'] = self.workload.getChainParentageSimpleMapping( ) # Feed values back to save in couch if self.eventsPerJob: arguments['Task1']['EventsPerJob'] = self.eventsPerJob if self.eventsPerLumi: arguments['Task1']['EventsPerLumi'] = self.eventsPerLumi return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a StoreResults workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] self.cmsPath = arguments["CmsPath"] # Required parameters that can be empty. self.scenario = arguments["Scenario"] # Optional arguments. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlackList = arguments.get("BlockBlackList", []) self.blockWhiteList = arguments.get("BlockWhiteList", []) self.runBlackList = arguments.get("RunBlackList", []) self.runWhiteList = arguments.get("RunWhiteList", []) self.emulation = arguments.get("Emulation", False) self.stdJobSplitAlgo = arguments.get("StdJobSplitAlgo", 'FileBased') self.stdJobSplitArgs = arguments.get("StdJobSplitArgs", {'files_per_job': 1}) self.dataTier = arguments.get("DataTier", 'USER') dataTier = self.dataTier (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = \ self.inputDataset[1:].split("/") processedDatasetName = "%s-%s" % (self.acquisitionEra, self.processingVersion) workload = self.createWorkload() mergeTask = workload.newTask("StoreResults") self.addDashboardMonitoring(mergeTask) mergeTaskCmssw = mergeTask.makeStep("cmsRun1") mergeTaskCmssw.setStepType("CMSSW") mergeTaskStageOut = mergeTaskCmssw.addStep("stageOut1") mergeTaskStageOut.setStepType("StageOut") mergeTaskLogArch = mergeTaskCmssw.addStep("logArch1") mergeTaskLogArch.setStepType("LogArchive") self.addLogCollectTask(mergeTask, taskName = "StoreResultsLogCollect") mergeTask.addGenerator("BasicNaming") mergeTask.addGenerator("BasicCounter") mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTask.addInputDataset(primary = self.inputPrimaryDataset, processed = self.inputProcessedDataset, tier = self.inputDataTier, dbsurl = self.dbsUrl, block_blacklist = self.blockBlackList, block_whitelist = self.blockWhiteList, run_blacklist = self.runBlackList, run_whitelist = self.runWhiteList) splitAlgo = "ParentlessMergeBySize" mergeTask.setSplittingAlgorithm(splitAlgo, max_merge_size = self.maxMergeSize, min_merge_size = self.minMergeSize, max_merge_events = self.maxMergeEvents, siteWhitelist = self.siteWhitelist, siteBlacklist = self.siteBlacklist) mergeTaskCmsswHelper = mergeTaskCmssw.getTypeHelper() mergeTaskCmsswHelper.cmsswSetup(self.frameworkVersion, softwareEnvironment = "", scramArch = self.scramArch) mergeTaskCmsswHelper.setDataProcessingConfig("cosmics", "merge") mergedLFN = "%s/%s/%s/%s/%s" % (self.mergedLFNBase, self.acquisitionEra, self.inputPrimaryDataset, dataTier, self.processingVersion) mergeTaskCmsswHelper.addOutputModule("Merged", primaryDataset = self.inputPrimaryDataset, processedDataset = processedDatasetName, dataTier = dataTier, lfnBase = mergedLFN) return workload