def getWorkloadCreateArgs(): """ Some default values set for testing purposes """ baseArgs = StdBase.getWorkloadCreateArgs() specArgs = {"RequestType": {"default": "MonteCarlo", "optional": False}, "PrimaryDataset": {"optional": False, "validate": primdataset, "attr": "inputPrimaryDataset", "null": False}, "Seeding": {"default": "AutomaticSeeding", "null": False, "validate": lambda x: x in ["ReproducibleSeeding", "AutomaticSeeding"]}, "FilterEfficiency": {"default": 1.0, "type": float, "null": False, "validate": lambda x: x > 0.0}, "RequestNumEvents": {"type": int, "null": False, "optional": False, "validate": lambda x: x > 0}, "FirstEvent": {"default": 1, "type": int, "validate": lambda x: x > 0, "null": False}, "FirstLumi": {"default": 1, "type": int, "validate": lambda x: x > 0, "null": False}, "MCPileup": {"validate": dataset, "attr": "mcPileup", "null": True}, "DataPileup": {"validate": dataset, "attr": "dataPileup", "null": True}, "SplittingAlgo": {"default": "EventBased", "null": False, "validate": lambda x: x in ["EventBased"], "attr": "prodJobSplitAlgo"}, "DeterministicPileup": {"default": False, "type": strToBool, "null": False}, "EventsPerJob": {"type": int, "validate": lambda x: x > 0, "null": True}, "EventsPerLumi": {"type": int, "validate": lambda x: x > 0, "null": True}, "LheInputFiles": {"default": False, "type": strToBool, "null": False} } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadCreateArgs(): baseArgs = StdBase.getWorkloadCreateArgs() specArgs = {"InputDataset": {"optional": False, "validate": dataset, "null": False}, "Scenario": {"optional": True, "null": True, "attr": "procScenario"}, "PrimaryDataset": {"optional": True, "validate": primdataset, "attr": "inputPrimaryDataset", "null": True}, "RunBlacklist": {"default": [], "type": makeList, "null": False, "validate": lambda x: all([int(y) > 0 for y in x])}, "RunWhitelist": {"default": [], "type": makeList, "null": False, "validate": lambda x: all([int(y) > 0 for y in x])}, "BlockBlacklist": {"default": [], "type": makeList, "validate": lambda x: all([block(y) for y in x])}, "BlockWhitelist": {"default": [], "type": makeList, "validate": lambda x: all([block(y) for y in x])}, "SplittingAlgo": {"default": "EventAwareLumiBased", "null": False, "validate": lambda x: x in ["EventBased", "LumiBased", "EventAwareLumiBased", "FileBased"], "attr": "procJobSplitAlgo"}, "EventsPerJob": {"type": int, "validate": lambda x: x > 0, "null": True}, "LumisPerJob": {"default": 8, "type": int, "null": False, "validate": lambda x: x > 0}, "FilesPerJob": {"default": 1, "type": int, "null": False, "validate": lambda x: x > 0} } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadCreateArgs(): baseArgs = StdBase.getWorkloadCreateArgs() specArgs = {"RequestType": {"default": "Express"}, "ConfigCacheID": {"optional": True, "null": True}, "Scenario": {"optional": False, "attr": "procScenario"}, "RecoCMSSWVersion": {"validate": lambda x: x in releases(), "optional": False, "attr": "recoFrameworkVersion"}, "RecoScramArch": {"validate": lambda x: all([y in architectures() for y in x]), "optional": False, "type": makeNonEmptyList}, "GlobalTag": {"optional": False}, "GlobalTagTransaction": {"optional": False}, "ProcessingString": {"default": "", "validate": procstringT0}, "StreamName": {"optional": False}, "SpecialDataset": {"optional": False}, "AlcaHarvestTimeout": {"type": int, "optional": False}, "AlcaHarvestDir": {"optional": False, "null": True}, "AlcaSkims": {"type": makeList, "optional": False}, "DQMSequences": {"type": makeList, "attr": "dqmSequences", "optional": False}, "Outputs": {"type": makeList, "optional": False}, "MaxInputRate": {"type": int, "optional": False}, "MaxInputEvents": {"type": int, "optional": False}, "MaxInputSize": {"type": int, "optional": False}, "MaxInputFiles": {"type": int, "optional": False}, "MaxLatency": {"type": int, "optional": False}, } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __call__(self, workloadName, arguments): """ _call_ Create a MonteCarloFromGEN workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) self.procConfigCacheID = arguments.get("ProcConfigCacheID") # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {"lumis_per_job": 1}) return self.buildWorkload()
def __init__(self): StdBase.__init__(self) # Define attributes used by this spec self.openRunningTimeout = None return
def __init__(self): StdBase.__init__(self) self.eventsPerJob = None self.eventsPerLumi = None self.mergeMapping = {} self.taskMapping = {} self.taskOutputMapping = {}
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.arguments = arguments self.couchURL = arguments['CouchURL'] self.couchDBName = arguments['CouchDBName'] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments.get("GlobalTag", None) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.emulation = arguments.get("Emulation", False) numTasks = arguments['TaskChain'] for i in range(1, numTasks+1): #consistency check that there are numTasks defined in the request: if not arguments.has_key("Task%s" % i): msg = "Specified number of tasks: %s does not match defined task dictionary for Task%s" % (i, i) raise RuntimeError, msg taskConf = getTaskN(arguments, i) parent = parentTaskName(taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf.get("BlockBlacklist", []) self.blockWhitelist = taskConf.get("BlockWhitelist", []) self.runBlacklist = taskConf.get("RunBlacklist", []) self.runWhitelist = taskConf.get("RunWhitelist", []) parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgorithm'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgorithm'], taskConf['SplittingArguments']) self.setupTask(task, taskConf) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf return self.workload
def getWorkloadArguments(): baseArgs = StdBase.getWorkloadArguments() reqMgrArgs = StdBase.getWorkloadArgumentsWithReqMgr() baseArgs.update(reqMgrArgs) specArgs = {"RequestType": {"default": "TaskChain", "optional": False, "attr": "requestType"}, "GlobalTag": {"default": "GT_TC_V1", "type": str, "optional": False, "validate": None, "attr": "globalTag", "null": False}, "CouchURL": {"default": "http://localhost:5984", "type": str, "optional": False, "validate": couchurl, "attr": "couchURL", "null": False}, "CouchDBName": {"default": "dp_configcache", "type": str, "optional": False, "validate": identifier, "attr": "couchDBName", "null": False}, "ConfigCacheUrl": {"default": None, "type": str, "optional": True, "validate": None, "attr": "configCacheUrl", "null": True}, "IgnoredOutputModules": {"default": [], "type": makeList, "optional": True, "validate": None, "attr": "ignoredOutputModules", "null": False}, "TaskChain": {"default": 1, "type": int, "optional": False, "validate": lambda x: x > 0, "attr": "taskChain", "null": False}, "FirstEvent": {"default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "firstEvent", "null": False}, "FirstLumi": {"default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "firstLumi", "null": False} } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadArguments(): """ If arg is not specifyed, automatically set by following default value - default: None - type: str - optional: True - assign_optional: True - validate: None - attr: change first letter to lower case - null: False """ baseArgs = StdBase.getWorkloadArguments() ## specArgs = {"Outputs" : {"default" : {}, "type" : dict, ## "optional" : False, "validate" : None, ## "attr" : "outputs", "null" : False}, specArgs = {"RequestType" : {"default" : "Express"}, "Scenario" : {"optional" : False, "attr" : "procScenario"}, "RecoCMSSWVersion" : {"optional" : False, "validate" : cmsswversion, "attr" : "recoFrameworkVersion", "null" : True}, "RecoScramArch" : {"optional" : False, "null" : True}, "GlobalTag" : {"optional" : False}, "GlobalTagTransaction" : {"optional" : False}, "StreamName" : {"optional" : False}, "SpecialDataset" : {"optional" : False}, "AlcaHarvestTimeout" : {"type" : int, "optional" : False}, "AlcaHarvestDir" : {"optional" : False, "null" : True}, "AlcaSkims" : {"type" : makeList, "optional" : False}, "DqmSequences" : {"type" : makeList, "optional" : False}, "BlockCloseDelay" : {"type" : int, "optional" : False, "validate" : lambda x : x > 0}, } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __call__(self, workloadName, arguments): """ _call_ Create a DQMHarvest workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.workload.setDashboardActivity("harvesting") self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) splitArgs = {"runs_per_job": 1} if self.dqmHarvestUnit == "multiRun": # then it should result in a single job in the end, very high number of runs splitArgs['runs_per_job'] = 999999 self.workload.setWorkQueueSplitPolicy("Dataset", "Harvest", splitArgs) # also creates the logCollect job by default self.addDQMHarvestTask(uploadProxy=self.dqmUploadProxy, periodic_harvest_interval=self.periodicHarvestInterval, dqmHarvestUnit=self.dqmHarvestUnit) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() return self.workload
def getWorkloadArguments(): """ If arg is not specifyed, automatically set by following default value - default: None - type: str - optional: True - assign_optional: True - validate: None - attr: change first letter to lower case - null: False """ baseArgs = StdBase.getWorkloadArguments() ## specArgs = {"Outputs" : {"default" : {}, "type" : dict, ## "optional" : False, "validate" : None, ## "attr" : "outputs", "null" : False}, specArgs = {"RequestType" : {"default" : "Repack"}, "Scenario" : {"default" : "fake", "attr" : "procScenario"}, "GlobalTag" : {"default" : "fake"}, "BlockCloseDelay" : {"type" : int, "optional" : False, "validate" : lambda x : x > 0, }, } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __call__(self, workloadName, arguments): """ _call_ Create a Repack workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.outputs = arguments['Outputs'] # job splitting parameters self.repackSplitArgs = {} self.repackSplitArgs['maxSizeSingleLumi'] = arguments['MaxSizeSingleLumi'] self.repackSplitArgs['maxSizeMultiLumi'] = arguments['MaxSizeMultiLumi'] self.repackSplitArgs['maxInputEvents'] = arguments['MaxInputEvents'] self.repackSplitArgs['maxInputFiles'] = arguments['MaxInputFiles'] self.repackSplitArgs['maxLatency'] = arguments['MaxLatency'] self.repackMergeSplitArgs = {} self.repackMergeSplitArgs['minInputSize'] = arguments['MinInputSize'] self.repackMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize'] self.repackMergeSplitArgs['maxEdmSize'] = arguments['MaxEdmSize'] self.repackMergeSplitArgs['maxOverSize'] = arguments['MaxOverSize'] self.repackMergeSplitArgs['maxInputEvents'] = arguments['MaxInputEvents'] self.repackMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles'] self.repackMergeSplitArgs['maxLatency'] = arguments['MaxLatency'] return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Create a workload instance for an Analysis request """ StdBase.__call__(self, workloadName, arguments) self.minMergeSize = 1 if self.Lumis and self.analysisJobSplitAlgo not in ['LumiBased']: raise RuntimeError('Running on selected lumis only supported in split mode(s) %s' % 'LumiBased') if self.analysisJobSplitAlgo == 'EventBased': self.analysisJobSplitArgs = {'events_per_job' : self.eventsPerJob} elif self.analysisJobSplitAlgo == 'LumiBased': self.analysisJobSplitArgs = {'lumis_per_job' : self.lumisPerJob} if self.Lumis: self.analysisJobSplitArgs.update({'lumis' : self.Lumis}) self.analysisJobSplitArgs.update({'runs' : self.Runs}) self.analysisJobSplitArgs.update( {'halt_job_on_file_boundaries' : False, 'splitOnRun' : False, }) return self.buildWorkload()
def __init__(self): StdBase.__init__(self) self.inputPrimaryDataset = None self.inputProcessedDataset = None return
def __call__(self, workloadName, arguments): """ _call_ Create a DQMHarvest workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.workload.setDashboardActivity("harvesting") self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) self.workload.setWorkQueueSplitPolicy("Dataset", "FileBased", {"files_per_job": 99999}) # also creates the logCollect job by default self.addDQMHarvestTask(uploadProxy=self.dqmUploadProxy, periodic_harvest_interval=self.periodicHarvestInterval, dqmHarvestUnit=self.dqmHarvestUnit) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() return self.workload
def getWorkloadArguments(): baseArgs = StdBase.getWorkloadArguments() reqMgrArgs = StdBase.getWorkloadArgumentsWithReqMgr() baseArgs.update(reqMgrArgs) specArgs = {"RequestType": {"default": "DQMHarvest"}, "InputDataset": {"default": None, "optional": False, "validate": dataset}, "ConfigCacheID": {"optional": True, "validate": None, "null": True}, "UnmergedLFNBase": {"default": "/store/unmerged"}, "MergedLFNBase": {"default": "/store/data"}, "MinMergeSize": {"default": 2 * 1024 * 1024 * 1024, "type": int, "validate": lambda x: x > 0}, "MaxMergeSize": {"default": 4 * 1024 * 1024 * 1024, "type": int, "validate": lambda x: x > 0}, "MaxMergeEvents": {"default": 100000, "type": int, "validate": lambda x: x > 0}, "BlockBlacklist": {"default": [], "type": makeList, "validate": lambda x: all([block(y) for y in x])}, "BlockWhitelist": {"default": [], "type": makeList, "validate": lambda x: all([block(y) for y in x])}, "RunBlacklist": {"default": [], "type": makeList, "validate": lambda x: all([int(y) > 0 for y in x])}, "RunWhitelist": {"default": [], "type": makeList, "validate": lambda x: all([int(y) > 0 for y in x])} } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __init__(self): StdBase.__init__(self) self.taskMapping = {} self.mergeMapping = {} self.arguments = {} self.multicore = False self.multicoreNCores = 1
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitArgs = {} if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["max_events_per_lumi"] = 100000 elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = {} if self.skimJobSplitAlgo == "EventBased" or self.skimJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob if self.skimJobSplitAlgo == "EventAwareLumiBased": self.skimJobSplitArgs["max_events_per_lumi"] = 20000 elif self.skimJobSplitAlgo == "LumiBased": self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.skimJobSplitAlgo == "FileBased": self.skimJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {"files_per_job": 1, "include_parents": True}) return self.buildWorkload()
def getWorkloadCreateArgs(): baseArgs = StdBase.getWorkloadCreateArgs() specArgs = {"RequestType": {"default": "StoreResults", "optional": False}, "InputDataset": {"optional": False, "validate": dataset, "null": False}, "ConfigCacheID": {"optional": True, "null": True}, "DataTier": {"default": "USER", "type": str, "optional": True, "validate": None, "attr": "dataTier", "null": False}, "PhysicsGroup": {"default": "", "optional": False, "null": False, "validate": physicsgroup}, "MergedLFNBase": {"default": "/store/results", "type": str, "optional": True, "validate": None, "attr": "mergedLFNBase", "null": False}, "BlockBlacklist": {"default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockBlacklist", "null": False}, "BlockWhitelist": {"default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockWhitelist", "null": False}, "RunBlacklist": {"default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runBlacklist", "null": False}, "RunWhitelist": {"default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runWhitelist", "null": False}} baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __init__(self): StdBase.__init__(self) self.mergeMapping = {} self.taskMapping = {} self.arguments = {} self.multicore = False self.multicoreNCores = 1 self.ignoredOutputModules = []
def getWorkloadAssignArgs(): baseArgs = StdBase.getWorkloadAssignArgs() specArgs = { "ChainParentageMap": {"default": {}, "type": dict}, } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __init__(self): StdBase.__init__(self) # Define attributes used by this spec self.openRunningTimeout = None self.transientModules = [] return
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() for i in range(1, self.taskChain + 1): originalTaskConf = arguments["Task%d" % i] taskConf = {} # Make a shallow copy of the taskConf for k, v in originalTaskConf.items(): taskConf[k] = v parent = taskConf.get("InputTask", None) self.modifyTaskConfiguration(taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] if taskConf['Multicore'] and taskConf['Multicore'] != 'None': self.multicoreNCores = int(taskConf['Multicore']) parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(task, taskConf) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf self.workload.ignoreOutputModules(self.ignoredOutputModules) return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.frameworkVersion = arguments['CMSSWVersion'] self.globalTag = arguments['GlobalTag'] self.writeTiers = arguments['WriteTiers'] self.alcaSkims = arguments['AlcaSkims'] self.inputDataset = arguments['InputDataset'] self.promptSkims = arguments['PromptSkims'] self.couchURL = arguments['CouchURL'] self.couchDBName = arguments['CouchDBName'] self.configCacheUrl = arguments.get("ConfigCacheUrl", None) self.initCommand = arguments['InitCommand'] #Optional parameters self.envPath = arguments.get('EnvPath', None) self.binPath = arguments.get('BinPath', None) if arguments.has_key('Multicore'): numCores = arguments.get('Multicore') if numCores == None or numCores == "": self.multicore = False elif numCores == "auto": self.multicore = True self.multicoreNCores = "auto" else: self.multicore = True self.multicoreNCores = numCores # Do we run log collect ? (Tier0 does not support it yet) self.doLogCollect = arguments.get("DoLogCollect", True) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "EventBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {"events_per_job": 500}) self.skimJobSplitAlgo = arguments.get("SkimJobSplitAlgo", "FileBased") self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {"files_per_job": 1, "include_parents": True}) return self.buildWorkload()
def getWorkloadAssignArgs(): baseArgs = StdBase.getWorkloadAssignArgs() specArgs = { "Override": {"default": {"eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/Repack"}, "type": dict}, } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __call__(self, workloadName, arguments): """ _call_ Create a DataProcessing workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputDataset = arguments["InputDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] # One of these parameters must be set. if arguments.has_key("ProdConfigCacheID"): self.procConfigCacheID = arguments["ProdConfigCacheID"] else: self.procConfigCacheID = arguments.get("ProcConfigCacheID", None) if arguments.has_key("Scenario"): self.procScenario = arguments.get("Scenario", None) else: self.procScenario = arguments.get("ProcScenario", None) if arguments.has_key("Multicore"): numCores = arguments.get("Multicore") if numCores == None or numCores == "": self.multicore = False elif numCores == "auto": self.multicore = True self.multicoreNCores = "auto" else: self.multicore = True self.multicoreNCores = numCores # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased") self.procJobSplitArgs = arguments.get("StdJobSplitArgs", {"lumis_per_job": 8, "include_parents": self.includeParents}) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Create a workload instance for a MonteCarlo request """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.inputPrimaryDataset = arguments["PrimaryDataset"] self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] self.seeding = arguments.get("Seeding", "AutomaticSeeding") self.configCacheID = arguments["ConfigCacheID"] # Splitting arguments timePerEvent = int(arguments.get("TimePerEvent", 60)) filterEfficiency = float(arguments.get("FilterEfficiency", 1.0)) totalTime = int(arguments.get("TotalTime", 9 * 3600)) self.totalEvents = int(int(arguments["RequestNumEvents"]) / filterEfficiency) self.firstEvent = int(arguments.get("FirstEvent", 1)) self.firstLumi = int(arguments.get("FirstLumi", 1)) # We don't write out every event in MC, adjust the size per event accordingly self.sizePerEvent = self.sizePerEvent * filterEfficiency # pileup configuration for the first generation task self.pileupConfig = arguments.get("PileupConfig", None) #Events per lumi configuration (Allow others to inherit) self.eventsPerLumi = arguments.get("EventsPerLumi", None) if self.eventsPerLumi != None: self.eventsPerLumi = int(self.eventsPerLumi) # The CouchURL and name of the ConfigCache database must be passed in # by the ReqMgr or whatever is creating this workflow. self.couchURL = arguments["CouchURL"] self.couchDBName = arguments["CouchDBName"] self.configCacheUrl = arguments.get("ConfigCacheUrl", None) # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.emulation = arguments.get("Emulation", False) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. eventsPerJob = int(totalTime/timePerEvent/filterEfficiency) self.prodJobSplitAlgo = arguments.get("ProdJobSplitAlgo", "EventBased") self.prodJobSplitArgs = arguments.get("ProdJobSplitArgs", {"events_per_job": eventsPerJob}) self.previousJobCount = 0 if self.firstEvent > 1 or self.firstLumi > 1: self.previousJobCount = int(math.ceil(self.firstEvent/float(self.prodJobSplitArgs["events_per_job"]))) self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount return self.buildWorkload()
def __call__(self, workloadName, arguments): """ __call__ Create a StepChain workload with the given parameters. Configures the workload based on the first task information, then properly sets up the remaining tasks. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() # Update the task configuration taskConf = {} for k, v in arguments["Step1"].iteritems(): taskConf[k] = v self.modifyTaskConfiguration(taskConf, True, 'InputDataset' not in taskConf) self.inputPrimaryDataset = self.getStepValue('PrimaryDataset', taskConf, self.primaryDataset) self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] self.splittingAlgo = taskConf['SplittingAlgo'] # Create the first task firstTask = self.workload.newTask(taskConf['StepName']) # Create a proper task and set workload level arguments if isGenerator(arguments): self.workload.setDashboardActivity("production") self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(firstTask, taskConf) else: self.workload.setDashboardActivity("processing") self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(firstTask, taskConf) # Now modify this task to add the next steps if self.stepChain > 1: self.setupNextSteps(firstTask, arguments) self.workload.setStepMapping(self.stepMapping) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) # Feed values back to save in couch if self.eventsPerJob: arguments['Step1']['EventsPerJob'] = self.eventsPerJob if self.eventsPerLumi: arguments['Step1']['EventsPerLumi'] = self.eventsPerLumi return self.workload
def getWorkloadArguments(): specArgs = {"RequestType" : {"default" : "Resubmission"}, "InitialTaskPath" : {"default" : "/SomeRequest/Task1", "optional": False, "validate": lambda x: len(x.split('/')) > 2}, "ACDCServer" : {"default" : "https://cmsweb.cern.ch/couchdb", "validate" : couchurl, "attr" : "acdcServer"}, "ACDCDatabase" : {"default" : "acdcserver", "validate" : identifier, "attr" : "acdcDatabase"}, "CollectionName" : {"default" : None, "null" : True}, "IgnoredOutputModules" : {"default" : [], "type" : makeList}} StdBase.setDefaultArgumentsProperty(specArgs) return specArgs
def __call__(self, workloadName, arguments): """ _call_ Create a Express workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) # Required parameters that must be specified by the Requestor. self.frameworkVersion = arguments["CMSSWVersion"] self.globalTag = arguments["GlobalTag"] self.globalTagTransaction = arguments["GlobalTagTransaction"] self.procScenario = arguments['ProcScenario'] self.alcaSkims = arguments['AlcaSkims'] self.dqmSequences = arguments['DqmSequences'] self.outputs = arguments['Outputs'] self.dqmUploadProxy = arguments['DQMUploadProxy'] self.alcaHarvestTimeout = arguments['AlcaHarvestTimeout'] self.alcaHarvestDir = arguments['AlcaHarvestDir'] self.streamName = arguments['StreamName'] # job splitting parameters (also required parameters) self.expressSplitArgs = {} self.expressSplitArgs['maxInputEvents'] = arguments['MaxInputEvents'] self.expressMergeSplitArgs = {} self.expressMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize'] self.expressMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles'] self.expressMergeSplitArgs['maxLatency'] = arguments['MaxLatency'] if arguments.has_key("Multicore"): numCores = arguments.get("Multicore") if numCores == None or numCores == "": self.multicore = False elif numCores == "auto": self.multicore = True self.multicoreNCores = "auto" else: self.multicore = True self.multicoreNCores = numCores # Optional arguments that default to something reasonable. self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") self.blockBlacklist = arguments.get("BlockBlacklist", []) self.blockWhitelist = arguments.get("BlockWhitelist", []) self.runBlacklist = arguments.get("RunBlacklist", []) self.runWhitelist = arguments.get("RunWhitelist", []) self.emulation = arguments.get("Emulation", False) # fixed parameters that are used in various places self.alcaHarvestOutLabel = "Sqlite" return self.buildWorkload()
def __call__(self, workloadName, arguments): """ Store the arguments in attributes with the proper formatting. """ StdBase.__call__(self, workloadName, arguments) # Adjust the events by the filter efficiency self.totalEvents = int(self.requestNumEvents / self.filterEfficiency) # We don't write out every event in MC, # adjust the size per event accordingly self.sizePerEvent = self.sizePerEvent * self.filterEfficiency # Tune the splitting, only EventBased is allowed for MonteCarlo # 8h jobs are CMS standard, set the default with that in mind self.prodJobSplitAlgo = "EventBased" if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) if self.eventsPerLumi is None: self.eventsPerLumi = self.eventsPerJob self.prodJobSplitArgs = {"events_per_job": self.eventsPerJob, "events_per_lumi" : self.eventsPerLumi, "lheInputFiles" : self.lheInputFiles} # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Production can be extending statistics, # need to move the initial lfn counter self.previousJobCount = 0 if self.firstLumi > 1: self.previousJobCount = int(math.ceil((self.firstEvent - 1) / self.eventsPerJob)) self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount return self.buildWorkload()
def __init__(self): """ __init__ Setup parameters that will be later overwritten in the call, otherwise pylint will complain about them. """ StdBase.__init__(self) self.configCacheUrl = None self.globalTag = None self.frameworkVersion = None self.scramArch = None self.couchDBName = None self.stepChain = None self.sizePerEvent = None self.timePerEvent = None self.primaryDataset = None self.prepID = None self.eventsPerJob = None self.eventsPerLumi = None # stepMapping is going to be used during assignment for properly mapping # the arguments to each step/cmsRun self.stepMapping = {} self.stepParentageMapping = {}
def getChainCreateArgs(firstTask=False, generator=False): """ _getChainCreateArgs_ Each task dictionary specifies its own set of arguments that need to be validated as well, most of them are already defined in StdBase.getWorkloadCreateArgs and those do not appear here since they are all optional. Here only new arguments are listed. """ baseArgs = StdBase.getChainCreateArgs(firstTask, generator) arguments = { "TaskName": {"optional": False, "null": False}, "InputTask": {"default": None, "optional": firstTask, "null": False}, "TransientOutputModules": {"default": [], "type": makeList, "optional": True, "null": False}, "DeterministicPileup": {"default": False, "type": strToBool, "optional": True, "null": False}, "GlobalTag": {"type": str, "optional": True}, "TimePerEvent": {"type": float, "optional": True, "validate": lambda x: x > 0}, "SizePerEvent": {"type": float, "optional": True, "validate": lambda x: x > 0}, 'PrimaryDataset': {'default': None, 'optional': not generator, 'validate': primdataset, 'null': False}, } baseArgs.update(arguments) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def testCalcEvtsPerJobLumi(self): """ _testCalcEvtsPerJobLumi_ Check that EventsPerJob and EventsPerLumi are properly calculated for EventBased job splitting. """ self.assertEqual((123, 123), StdBase.calcEvtsPerJobLumi(123, 345, 1)) self.assertEqual((123, 123), StdBase.calcEvtsPerJobLumi(123, None, 1)) self.assertEqual((28800, 100), StdBase.calcEvtsPerJobLumi(None, 100, 1)) self.assertEqual((600, 100), StdBase.calcEvtsPerJobLumi(None, 100, 50.5)) self.assertEqual((570, 570), StdBase.calcEvtsPerJobLumi(None, 1000, 50.5)) self.assertEqual((23040, 23040), StdBase.calcEvtsPerJobLumi(None, None, 1.25)) self.assertEqual((229, 229), StdBase.calcEvtsPerJobLumi(None, None, 125.5)) self.assertEqual((23528, 11764), StdBase.calcEvtsPerJobLumi(24000, 11764, 10.157120496967591)) self.assertEqual((2835, 2835), StdBase.calcEvtsPerJobLumi(None, 11764, 10.157120496967591)) return
def validate_request_priority(reqArgs): """ Validate the RequestPriority argument against its definition in StdBase :param reqArgs: dictionary of user request arguments :return: nothing, but raises an exception in case of an invalid value """ if 'RequestPriority' in reqArgs: reqPrioDefin = StdBase.getWorkloadCreateArgs()['RequestPriority'] if not isinstance(reqArgs['RequestPriority'], reqPrioDefin['type']): msg = "RequestPriority must be of integer type, not: {}".format( type(reqArgs['RequestPriority'])) raise InvalidSpecParameterValue(msg) if reqPrioDefin['validate'](reqArgs['RequestPriority']) is False: raise InvalidSpecParameterValue( "RequestPriority must be an integer between 0 and 999999")
def modifyJobSplitting(self, taskConf, generator): """ _modifyJobSplitting_ Adapt job splitting according to the first step configuration or lack of some of them. """ if generator: requestNumEvts = int(taskConf.get("RequestNumEvents", 0)) filterEff = taskConf.get("FilterEfficiency") # Adjust totalEvents according to the filter efficiency taskConf["SplittingAlgo"] = "EventBased" taskConf["RequestNumEvents"] = int(requestNumEvts / filterEff) taskConf["SizePerEvent"] = self.sizePerEvent * filterEff taskConf["SplittingArguments"] = {} if taskConf["SplittingAlgo"] in ["EventBased", "EventAwareLumiBased"]: taskConf["EventsPerJob"], taskConf[ "EventsPerLumi"] = StdBase.calcEvtsPerJobLumi( taskConf.get("EventsPerJob"), taskConf.get("EventsPerLumi"), self.timePerEvent, taskConf.get("RequestNumEvents")) self.eventsPerJob = taskConf["EventsPerJob"] self.eventsPerLumi = taskConf["EventsPerLumi"] taskConf["SplittingArguments"]["events_per_job"] = taskConf[ "EventsPerJob"] if taskConf["SplittingAlgo"] == "EventBased": taskConf["SplittingArguments"]["events_per_lumi"] = taskConf[ "EventsPerLumi"] else: taskConf["SplittingArguments"][ "job_time_limit"] = 48 * 3600 # 2 days taskConf["SplittingArguments"]["lheInputFiles"] = taskConf[ "LheInputFiles"] elif taskConf["SplittingAlgo"] == "LumiBased": taskConf["SplittingArguments"]["lumis_per_job"] = taskConf[ "LumisPerJob"] elif taskConf["SplittingAlgo"] == "FileBased": taskConf["SplittingArguments"]["files_per_job"] = taskConf[ "FilesPerJob"] taskConf["SplittingArguments"].setdefault("include_parents", taskConf['IncludeParents']) taskConf["SplittingArguments"].setdefault("deterministicPileup", self.deterministicPileup) return
def getWorkloadArguments(): baseArgs = StdBase.getWorkloadArguments() specArgs = {"InputDataset" : {"default" : "/MinimumBias/Run2010A-Dec22ReReco_v1/USER", "type" : str, "optional" : False, "validate" : dataset, "attr" : "inputDataset", "null" : False}, "GlobalTag" : {"default" : "GT_SR_V1:All", "type" : str, "optional" : False, "validate" : None, "attr" : "globalTag", "null" : False}, "CmsPath" : {"default" : "/tmp", "type" : str, "optional" : False, "validate" : None, "attr" : "cmsPath", "null" : False}, "DataTier" : {"default" : "USER", "type" : str, "optional" : True, "validate" : None, "attr" : "dataTier", "null" : False}, "UnmergedLFNBase" : {"default" : "/store/unmerged", "type" : str, "optional" : True, "validate" : None, "attr" : "unmergedLFNBase", "null" : False}, "MergedLFNBase" : {"default" : "/store/results", "type" : str, "optional" : True, "validate" : None, "attr" : "mergedLFNBase", "null" : False}, "MinMergeSize" : {"default" : 2 * 1024 * 1024 * 1024, "type" : int, "optional" : True, "validate" : lambda x : x > 0, "attr" : "minMergeSize", "null" : False}, "MaxMergeSize" : {"default" : 4 * 1024 * 1024 * 1024, "type" : int, "optional" : True, "validate" : lambda x : x > 0, "attr" : "maxMergeSize", "null" : False}, "MaxMergeEvents" : {"default" : 100000, "type" : int, "optional" : True, "validate" : lambda x : x > 0, "attr" : "maxMergeEvents", "null" : False}, "BlockBlacklist" : {"default" : [], "type" : makeList, "optional" : True, "validate" : lambda x: all([block(y) for y in x]), "attr" : "blockBlacklist", "null" : False}, "BlockWhitelist" : {"default" : [], "type" : makeList, "optional" : True, "validate" : lambda x: all([block(y) for y in x]), "attr" : "blockWhitelist", "null" : False}, "RunBlacklist" : {"default" : [], "type" : makeList, "optional" : True, "validate" : lambda x: all([int(y) > 0 for y in x]), "attr" : "runBlacklist", "null" : False}, "RunWhitelist" : {"default" : [], "type" : makeList, "optional" : True, "validate" : lambda x: all([int(y) > 0 for y in x]), "attr" : "runWhitelist", "null" : False}} baseArgs.update(specArgs) return baseArgs
def getWorkloadCreateArgs(): baseArgs = StdBase.getWorkloadCreateArgs() specArgs = { "RequestType": { "default": "StepChain", "optional": False }, "Step1": { "default": {}, "optional": False, "type": dict }, # ConfigCacheID is not used in the main dict for StepChain "ConfigCacheID": { "optional": True, "null": True }, "DeterministicPileup": { "default": False, "type": strToBool, "optional": True, "null": False }, "PrimaryDataset": { "null": True, "validate": primdataset }, "StepChain": { "default": 1, "type": int, "null": False, "optional": False, "validate": lambda x: x > 0 }, "ChainParentageMap": { "default": {}, "type": dict }, "FirstEvent": { "default": 1, "type": int, "validate": lambda x: x > 0, "null": False }, "FirstLumi": { "default": 1, "type": int, "validate": lambda x: x > 0, "null": False }, "ParentageResolved": { "default": False, "type": strToBool, "null": False }, ### Override StdBase parameter definition "TimePerEvent": { "default": 12.0, "type": float, "validate": lambda x: x > 0 }, "Memory": { "default": 2300.0, "type": float, "validate": lambda x: x > 0 }, "Multicore": { "default": 1, "type": int, "validate": checkMemCore }, "EventStreams": { "type": int, "null": True, "default": 0, "validate": checkEventStreams } } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getTaskArguments(firstTask=False, generator=False): """ _getTaskArguments_ Each task dictionary specifies its own set of arguments that need to be validated as well, most of them are already defined in StdBase.getWorkloadArguments and those do not appear here since they are all optional. Here only new arguments are listed. """ specArgs = { "TaskName": { "default": None, "type": str, "optional": False, "validate": None, "null": False }, "ConfigCacheUrl": { "default": "https://cmsweb.cern.ch/couchdb", "type": str, "optional": False, "validate": None, "attr": "configCacheUrl", "null": False }, "ConfigCacheID": { "default": None, "type": str, "optional": False, "validate": None, "null": False }, "KeepOutput": { "default": True, "type": strToBool, "optional": True, "validate": None, "null": False }, "TransientOutputModules": { "default": [], "type": makeList, "optional": True, "validate": None, "null": False }, "PrimaryDataset": { "default": None, "type": str, "optional": not generator, "validate": primdataset, "null": False }, "Seeding": { "default": "AutomaticSeeding", "type": str, "optional": True, "validate": lambda x: x in ["ReproducibleSeeding", "AutomaticSeeding"], "null": False }, "RequestNumEvents": { "default": 1000, "type": int, "optional": not generator, "validate": lambda x: x > 0, "null": False }, "MCPileup": { "default": None, "type": str, "optional": True, "validate": dataset, "null": False }, "DataPileup": { "default": None, "type": str, "optional": True, "validate": dataset, "null": False }, "DeterministicPileup": { "default": False, "type": strToBool, "optional": True, "validate": None, "attr": "deterministicPileup", "null": False }, "InputDataset": { "default": None, "type": str, "optional": generator or not firstTask, "validate": dataset, "null": False }, "InputTask": { "default": None, "type": str, "optional": firstTask, "validate": None, "null": False }, "InputFromOutputModule": { "default": None, "type": str, "optional": firstTask, "validate": None, "null": False }, "BlockBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "null": False }, "BlockWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "null": False }, "RunBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "null": False }, "RunWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "null": False }, "SplittingAlgo": { "default": "EventAwareLumiBased", "type": str, "optional": True, "validate": lambda x: x in [ "EventBased", "LumiBased", "EventAwareLumiBased", "FileBased" ], "null": False }, "EventsPerJob": { "default": None, "type": int, "optional": True, "validate": lambda x: x > 0, "null": False }, "LumisPerJob": { "default": 8, "type": int, "optional": True, "validate": lambda x: x > 0, "null": False }, "FilesPerJob": { "default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "null": False }, "EventsPerLumi": { "default": None, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "eventsPerLumi", "null": True }, "FilterEfficiency": { "default": 1.0, "type": float, "optional": True, "validate": lambda x: x > 0.0, "attr": "filterEfficiency", "null": False }, "LheInputFiles": { "default": False, "type": strToBool, "optional": True, "validate": None, "attr": "lheInputFiles", "null": False }, "PrepID": { "default": None, "type": str, "optional": True, "validate": None, "attr": "prepID", "null": True }, "Multicore": { "default": None, "type": int, "optional": True, "validate": lambda x: x > 0, "null": False }, } StdBase.setDefaultArgumentsProperty(specArgs) return specArgs
def getWorkloadArguments(): baseArgs = StdBase.getWorkloadArguments() reqMgrArgs = StdBase.getWorkloadArgumentsWithReqMgr() baseArgs.update(reqMgrArgs) specArgs = { "RequestType": { "default": "TaskChain", "optional": False, "attr": "requestType" }, "GlobalTag": { "default": "GT_TC_V1", "type": str, "optional": False, "validate": None, "attr": "globalTag", "null": False }, "CouchURL": { "default": "http://localhost:5984", "type": str, "optional": False, "validate": couchurl, "attr": "couchURL", "null": False }, "CouchDBName": { "default": "dp_configcache", "type": str, "optional": False, "validate": identifier, "attr": "couchDBName", "null": False }, "ConfigCacheUrl": { "default": None, "type": str, "optional": True, "validate": None, "attr": "configCacheUrl", "null": True }, "IgnoredOutputModules": { "default": [], "type": makeList, "optional": True, "validate": None, "attr": "ignoredOutputModules", "null": False }, "TaskChain": { "default": 1, "type": int, "optional": False, "validate": lambda x: x > 0, "attr": "taskChain", "null": False }, "FirstEvent": { "default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "firstEvent", "null": False }, "FirstLumi": { "default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "firstLumi", "null": False } } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadAssignArgs(): baseArgs = StdBase.getWorkloadAssignArgs() StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadCreateArgs(): """ Some default values set for testing purposes """ baseArgs = StdBase.getWorkloadCreateArgs() specArgs = { "RequestType": { "default": "MonteCarlo", "optional": False }, "PrimaryDataset": { "optional": False, "validate": primdataset, "attr": "inputPrimaryDataset", "null": False }, "Seeding": { "default": "AutomaticSeeding", "null": False, "validate": lambda x: x in ["ReproducibleSeeding", "AutomaticSeeding"] }, "FilterEfficiency": { "default": 1.0, "type": float, "null": False, "validate": lambda x: x > 0.0 }, "RequestNumEvents": { "type": int, "null": False, "optional": False, "validate": lambda x: x > 0 }, "FirstEvent": { "default": 1, "type": int, "validate": lambda x: x > 0, "null": False }, "FirstLumi": { "default": 1, "type": int, "validate": lambda x: x > 0, "null": False }, "MCPileup": { "validate": dataset, "attr": "mcPileup", "null": True }, "DataPileup": { "validate": dataset, "attr": "dataPileup", "null": True }, "SplittingAlgo": { "default": "EventBased", "null": False, "validate": lambda x: x in ["EventBased"], "attr": "prodJobSplitAlgo" }, "DeterministicPileup": { "default": False, "type": strToBool, "null": False }, "EventsPerJob": { "type": int, "validate": lambda x: x > 0, "null": True }, "EventsPerLumi": { "type": int, "validate": lambda x: x > 0, "null": True }, "LheInputFiles": { "default": False, "type": strToBool, "null": False } } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadCreateArgs(): baseArgs = StdBase.getWorkloadCreateArgs() specArgs = { "RequestType": { "default": "StoreResults", "optional": False }, "InputDataset": { "optional": False, "validate": dataset, "null": False }, "ConfigCacheID": { "optional": True, "null": True }, "DataTier": { "default": "USER", "type": str, "optional": True, "validate": None, "attr": "dataTier", "null": False }, "PhysicsGroup": { "default": "", "optional": False, "null": False, "validate": physicsgroup }, "MergedLFNBase": { "default": "/store/results", "type": str, "optional": True, "validate": None, "attr": "mergedLFNBase", "null": False }, "BlockBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockBlacklist", "null": False }, "BlockWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockWhitelist", "null": False }, "RunBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runBlacklist", "null": False }, "RunWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runWhitelist", "null": False } } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadCreateArgs(): baseArgs = StdBase.getWorkloadCreateArgs() specArgs = { "RequestType": { "default": "Express" }, "ConfigCacheID": { "optional": True, "null": True }, "Scenario": { "optional": False, "attr": "procScenario" }, "RecoCMSSWVersion": { "validate": lambda x: x in releases(), "optional": False, "attr": "recoFrameworkVersion" }, "RecoScramArch": { "validate": lambda x: all([y in architectures() for y in x]), "optional": False, "type": makeNonEmptyList }, "GlobalTag": { "optional": False }, "GlobalTagTransaction": { "optional": False }, "ProcessingString": { "default": "", "validate": procstringT0 }, "StreamName": { "optional": False }, "SpecialDataset": { "optional": False }, "AlcaHarvestTimeout": { "type": int, "optional": False }, "AlcaHarvestDir": { "optional": False, "null": True }, "AlcaSkims": { "type": makeList, "optional": False }, "DQMSequences": { "type": makeList, "attr": "dqmSequences", "optional": False }, "Outputs": { "type": makeList, "optional": False }, "MaxInputRate": { "type": int, "optional": False }, "MaxInputEvents": { "type": int, "optional": False }, "MaxInputSize": { "type": int, "optional": False }, "MaxInputFiles": { "type": int, "optional": False }, "MaxLatency": { "type": int, "optional": False }, } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadCreateArgs(): baseArgs = StdBase.getWorkloadCreateArgs() specArgs = { "RequestType": { "default": "Repack" }, "ConfigCacheID": { "optional": True, "null": True }, "Scenario": { "default": "fake", "attr": "procScenario" }, "GlobalTag": { "default": "fake" }, "ProcessingString": { "default": "", "validate": procstringT0 }, "Outputs": { "type": makeList, "optional": False }, "MaxSizeSingleLumi": { "type": int, "optional": False }, "MaxSizeMultiLumi": { "type": int, "optional": False }, "MaxInputEvents": { "type": int, "optional": False }, "MaxInputFiles": { "type": int, "optional": False }, "MaxLatency": { "type": int, "optional": False }, "MinInputSize": { "type": int, "optional": False }, "MaxInputSize": { "type": int, "optional": False }, "MaxEdmSize": { "type": int, "optional": False }, "MaxOverSize": { "type": int, "optional": False }, } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadArguments(): baseArgs = StdBase.getWorkloadArguments() specArgs = { "PrimaryDataset": { "default": "BlackHoleTest", "type": str, "optional": False, "validate": primdataset, "attr": "inputPrimaryDataset", "null": False }, "Seeding": { "default": "AutomaticSeeding", "type": str, "optional": True, "validate": lambda x: x in ["ReproducibleSeeding", "AutomaticSeeding"], "attr": "seeding", "null": False }, "GlobalTag": { "default": "GT_MC_V1:All", "type": str, "optional": False, "validate": None, "attr": "globalTag", "null": False }, "ConfigCacheID": { "default": None, "type": str, "optional": False, "validate": None, "attr": "configCacheID", "null": False }, "CouchURL": { "default": "http://localhost:5984", "type": str, "optional": False, "validate": couchurl, "attr": "couchURL", "null": False }, "CouchDBName": { "default": "mc_configcache", "type": str, "optional": False, "validate": identifier, "attr": "couchDBName", "null": False }, "ConfigCacheUrl": { "default": None, "type": str, "optional": True, "validate": None, "attr": "configCacheUrl", "null": False }, "FilterEfficiency": { "default": 1.0, "type": float, "optional": True, "validate": lambda x: x > 0.0, "attr": "filterEfficiency", "null": False }, "RequestNumEvents": { "default": 1000, "type": int, "optional": False, "validate": lambda x: x > 0, "attr": "requestNumEvents", "null": False }, "FirstEvent": { "default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "firstEvent", "null": False }, "FirstLumi": { "default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "firstLumi", "null": False }, "MCPileup": { "default": None, "type": str, "optional": True, "validate": dataset, "attr": "mcPileup", "null": False }, "DataPileup": { "default": None, "type": str, "optional": True, "validate": dataset, "attr": "dataPileup", "null": False }, "EventsPerJob": { "default": None, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "eventsPerJob", "null": True }, "EventsPerLumi": { "default": None, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "eventsPerLumi", "null": True }, "LheInputFiles": { "default": False, "type": strToBool, "optional": True, "validate": None, "attr": "lheInputFiles", "null": False } } baseArgs.update(specArgs) return baseArgs
def getWorkloadArguments(): baseArgs = StdBase.getWorkloadArguments() specArgs = { "RequestType": { "default": "PromptReco", "optional": True, "attr": "requestType" }, "Scenario": { "default": None, "type": str, "optional": False, "validate": None, "attr": "procScenario", "null": False }, "GlobalTag": { "default": None, "type": str, "optional": False, "validate": None, "attr": "globalTag", "null": False }, "ProcessingString": { "default": "", "validate": procstringT0 }, "WriteTiers": { "default": ["RECO", "AOD", "DQM", "ALCARECO"], "type": makeList, "optional": False, "validate": None, "attr": "writeTiers", "null": False }, "AlcaSkims": { "default": ["TkAlCosmics0T", "MuAlGlobalCosmics", "HcalCalHOCosmics"], "type": makeList, "optional": False, "validate": None, "attr": "alcaSkims", "null": False }, "InputDataset": { "default": "/Cosmics/Run2012A-v1/RAW", "type": str, "optional": False, "validate": dataset, "attr": "inputDataset", "null": False }, "PhysicsSkims": { "default": [], "type": makeList, "optional": True, "validate": None, "attr": "physicsSkims", "null": False }, "InitCommand": { "default": None, "type": str, "optional": True, "validate": None, "attr": "initCommand", "null": True }, "EnvPath": { "default": None, "type": str, "optional": True, "validate": None, "attr": "envPath", "null": True }, "BinPath": { "default": None, "type": str, "optional": True, "validate": None, "attr": "binPath", "null": True }, "DoLogCollect": { "default": True, "type": strToBool, "optional": True, "validate": None, "attr": "doLogCollect", "null": False }, "BlockBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockBlacklist", "null": False }, "BlockWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockWhitelist", "null": False }, "RunBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runBlacklist", "null": False }, "RunWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runWhitelist", "null": False }, "SplittingAlgo": { "default": "EventBased", "type": str, "optional": True, "validate": lambda x: x in [ "EventBased", "LumiBased", "EventAwareLumiBased", "FileBased" ], "attr": "procJobSplitAlgo", "null": False }, "EventsPerJob": { "default": 500, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "eventsPerJob", "null": False }, "LumisPerJob": { "default": 8, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "lumisPerJob", "null": False }, "FilesPerJob": { "default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "filesPerJob", "null": False }, "SkimSplittingAlgo": { "default": "FileBased", "type": str, "optional": True, "validate": lambda x: x in [ "EventBased", "LumiBased", "EventAwareLumiBased", "FileBased" ], "attr": "skimJobSplitAlgo", "null": False }, "SkimEventsPerJob": { "default": 500, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "skimEventsPerJob", "null": False }, "SkimLumisPerJob": { "default": 8, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "skimLumisPerJob", "null": False }, "SkimFilesPerJob": { "default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "skimFilesPerJob", "null": False }, "BlockCloseDelay": { "default": 86400, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "blockCloseDelay", "null": False } } baseArgs.update(specArgs) # add more optional arguments in case it is created using ReqMgr (not T0 case but should support both) reqMgrArguments = { "CouchURL": { "default": "https://cmsweb.cern.ch/couchdb", "validate": couchurl }, "CouchDBName": { "default": "reqmgr_config_cache", "type": str, "validate": identifier }, "ConfigCacheUrl": { "default": "https://cmsweb.cern.ch/couchdb", "validate": couchurl }, "ConfigCacheID": { "optional": True, "null": True }, "CouchWorkloadDBName": { "default": "reqmgr_workload_cache", "validate": identifier }, } baseArgs.update(reqMgrArguments) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def modifyTaskConfiguration(self, taskConf, firstTask=False, generator=False): """ _modifyTaskConfiguration_ Modify the TaskConfiguration according to the specifications in getWorkloadCreateArgs and getChainCreateArgs. It does type casting and assigns default values if key is not present, unless default value is None. """ taskArguments = self.getChainCreateArgs(firstTask, generator) for argument in taskArguments: if argument not in taskConf and taskArguments[argument][ "default"] is not None: taskConf[argument] = taskArguments[argument]["default"] elif argument in taskConf: taskConf[argument] = taskArguments[argument]["type"]( taskConf[argument]) if generator: taskConf["SplittingAlgo"] = "EventBased" # Adjust totalEvents according to the filter efficiency taskConf["RequestNumEvents"] = int(taskConf.get("RequestNumEvents", 0) / \ taskConf.get("FilterEfficiency")) taskConf["SizePerEvent"] = taskConf.get("SizePerEvent", self.sizePerEvent) * \ taskConf.get("FilterEfficiency") taskConf["SplittingArguments"] = {} if taskConf["SplittingAlgo"] in ["EventBased", "EventAwareLumiBased"]: taskConf["EventsPerJob"], taskConf[ "EventsPerLumi"] = StdBase.calcEvtsPerJobLumi( taskConf.get("EventsPerJob"), taskConf.get("EventsPerLumi"), taskConf.get("TimePerEvent", self.timePerEvent), taskConf.get("RequestNumEvents")) if firstTask: self.eventsPerJob = taskConf["EventsPerJob"] self.eventsPerLumi = taskConf["EventsPerLumi"] taskConf["SplittingArguments"]["events_per_job"] = taskConf[ "EventsPerJob"] if taskConf["SplittingAlgo"] == "EventBased": taskConf["SplittingArguments"]["events_per_lumi"] = taskConf[ "EventsPerLumi"] else: taskConf["SplittingArguments"][ "job_time_limit"] = 48 * 3600 # 2 days taskConf["SplittingArguments"]["lheInputFiles"] = taskConf[ "LheInputFiles"] elif taskConf["SplittingAlgo"] == "LumiBased": taskConf["SplittingArguments"]["lumis_per_job"] = taskConf[ "LumisPerJob"] elif taskConf["SplittingAlgo"] == "FileBased": taskConf["SplittingArguments"]["files_per_job"] = taskConf[ "FilesPerJob"] taskConf["SplittingArguments"].setdefault("include_parents", taskConf['IncludeParents']) taskConf["PileupConfig"] = parsePileupConfig( taskConf.get("MCPileup"), taskConf.get("DataPileup")) # Adjust the pileup splitting taskConf["SplittingArguments"].setdefault( "deterministicPileup", taskConf['DeterministicPileup']) return
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() # Detect blow-up factor from first task in chain. blowupFactor = 1 if (self.taskChain > 1) and 'TimePerEvent' in arguments["Task1"]: origTpe = arguments["Task1"]['TimePerEvent'] if origTpe <= 0: origTpe = 1.0 sumTpe = 0 tpeCount = 0 for i in xrange(1, self.taskChain + 1): if 'TimePerEvent' in arguments["Task%d" % i]: sumTpe += arguments["Task%d" % i]['TimePerEvent'] tpeCount += 1 if tpeCount > 0: blowupFactor = sumTpe / origTpe for i in xrange(1, self.taskChain + 1): originalTaskConf = arguments["Task%d" % i] taskConf = {} # Make a shallow copy of the taskConf for k, v in originalTaskConf.items(): taskConf[k] = v parent = taskConf.get("InputTask", None) self.modifyTaskConfiguration( taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule( taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy( "MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments'], blowupFactor=blowupFactor) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy( "Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments'], blowupFactor=blowupFactor) self.setupTask(task, taskConf) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf # now that all tasks have been created, create the parent x output dataset map self.createTaskParentageMapping(arguments) self.workload.setTaskParentageMapping(self.taskOutputMapping) self.workload.ignoreOutputModules(self.ignoredOutputModules) self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) # and push the parentage map to the reqmgr2 workload cache doc arguments[ 'ChainParentageMap'] = self.workload.getChainParentageSimpleMapping( ) # Feed values back to save in couch if self.eventsPerJob: arguments['Task1']['EventsPerJob'] = self.eventsPerJob if self.eventsPerLumi: arguments['Task1']['EventsPerLumi'] = self.eventsPerLumi return self.workload
def __call__(self, workloadName, arguments): """ __call__ Create a StepChain workload with the given parameters. Configures the workload based on the first task information, then properly sets up the remaining tasks. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() # Update the task configuration taskConf = {} for k, v in viewitems(arguments["Step1"]): taskConf[k] = v self.modifyTaskConfiguration(taskConf, True, 'InputDataset' not in taskConf) self.inputPrimaryDataset = self.getStepValue('PrimaryDataset', taskConf, self.primaryDataset) self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] self.splittingAlgo = taskConf['SplittingAlgo'] # Create the first task firstTask = self.workload.newTask(taskConf['StepName']) # it has to be called before the other steps are created self.createStepMappings(arguments) # Create a proper task and set workload level arguments if isGenerator(arguments): self.workload.setDashboardActivity("production") self.workload.setWorkQueueSplitPolicy( "MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(firstTask, taskConf) else: self.workload.setDashboardActivity("processing") self.workload.setWorkQueueSplitPolicy( "Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(firstTask, taskConf) # Now modify this task to add the next steps if self.stepChain > 1: self.setupNextSteps(firstTask, arguments) self.createStepParentageMappings(firstTask, arguments) self.workload.setStepMapping(self.stepMapping) self.workload.setStepParentageMapping(self.stepParentageMapping) # and push the parentage map to the reqmgr2 workload cache doc arguments[ 'ChainParentageMap'] = self.workload.getChainParentageSimpleMapping( ) # Feed values back to save in couch if self.eventsPerJob: arguments['Step1']['EventsPerJob'] = self.eventsPerJob if self.eventsPerLumi: arguments['Step1']['EventsPerLumi'] = self.eventsPerLumi return self.workload
def __call__(self, workloadName, arguments): """ _call_ Create a StoreResults workload with the given parameters. """ # first of all, we update the merged LFN based on the physics group arguments['MergedLFNBase'] += "/" + arguments['PhysicsGroup'].lower() StdBase.__call__(self, workloadName, arguments) (inputPrimaryDataset, inputProcessedDataset, inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() mergeTask = workload.newTask("StoreResults") self.addDashboardMonitoring(mergeTask) mergeTaskCmssw = mergeTask.makeStep("cmsRun1") mergeTaskCmssw.setStepType("CMSSW") mergeTaskStageOut = mergeTaskCmssw.addStep("stageOut1") mergeTaskStageOut.setStepType("StageOut") mergeTaskLogArch = mergeTaskCmssw.addStep("logArch1") mergeTaskLogArch.setStepType("LogArchive") self.addLogCollectTask(mergeTask, taskName="StoreResultsLogCollect") mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTask.addInputDataset(name=self.inputDataset, primary=inputPrimaryDataset, processed=inputProcessedDataset, tier=inputDataTier, dbsurl=self.dbsUrl, block_blacklist=self.blockBlacklist, block_whitelist=self.blockWhitelist, run_blacklist=self.runBlacklist, run_whitelist=self.runWhitelist) splitAlgo = "ParentlessMergeBySize" mergeTask.setSplittingAlgorithm(splitAlgo, max_merge_size=self.maxMergeSize, min_merge_size=self.minMergeSize, max_merge_events=self.maxMergeEvents) mergeTaskCmsswHelper = mergeTaskCmssw.getTypeHelper() mergeTaskCmsswHelper.cmsswSetup(self.frameworkVersion, softwareEnvironment="", scramArch=self.scramArch) mergeTaskCmsswHelper.setGlobalTag(self.globalTag) mergeTaskCmsswHelper.setSkipBadFiles(True) mergeTaskCmsswHelper.setDataProcessingConfig("do_not_use", "merge") self.addOutputModule(mergeTask, "Merged", primaryDataset=inputPrimaryDataset, dataTier=self.dataTier, filterName=None, forceMerged=True) workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase) workload.setDashboardActivity("StoreResults") # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString workload.setTaskPropertiesFromWorkload() self.reportWorkflowToDashboard(workload.getDashboardActivity()) return workload
def getWorkloadArguments(): baseArgs = StdBase.getWorkloadArguments() specArgs = { "InputDataset": { "default": "/MinimumBias/ComissioningHI-v1/RAW", "type": str, "optional": False, "validate": dataset, "attr": "inputDataset", "null": False }, "GlobalTag": { "default": "GT_DP_V1", "type": str, "optional": False, "validate": None, "attr": "globalTag", "null": False }, "CouchURL": { "default": "http://localhost:5984", "type": str, "optional": False, "validate": couchurl, "attr": "couchURL", "null": False }, "CouchDBName": { "default": "dp_configcache", "type": str, "optional": False, "validate": identifier, "attr": "couchDBName", "null": False }, "ConfigCacheUrl": { "default": None, "type": str, "optional": True, "validate": None, "attr": "configCacheUrl", "null": True }, "OpenRunningTimeout": { "default": 0, "type": int, "optional": True, "validate": lambda x: x >= 0, "attr": "openRunningTimeout", "null": False }, "BlockBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockBlacklist", "null": False }, "BlockWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockWhitelist", "null": False }, "RunBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runBlacklist", "null": False }, "RunWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runWhitelist", "null": False }, "SplittingAlgo": { "default": "EventAwareLumiBased", "type": str, "optional": True, "validate": lambda x: x in [ "EventBased", "LumiBased", "EventAwareLumiBased", "FileBased" ], "attr": "procJobSplitAlgo", "null": False }, "EventsPerJob": { "default": None, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "eventsPerJob", "null": False }, "LumisPerJob": { "default": 8, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "lumisPerJob", "null": False }, "FilesPerJob": { "default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "filesPerJob", "null": False } } baseArgs.update(specArgs) return baseArgs
def __init__(self): StdBase.__init__(self) self.multicore = False self.multicoreNCores = 1 return
def getWorkloadCreateArgs(): baseArgs = StdBase.getWorkloadCreateArgs() specArgs = { "InputDataset": { "optional": False, "validate": dataset, "null": False }, "Scenario": { "optional": True, "null": True, "attr": "procScenario" }, "PrimaryDataset": { "optional": True, "validate": primdataset, "attr": "inputPrimaryDataset", "null": True }, "RunBlacklist": { "default": [], "type": makeList, "null": False, "validate": lambda x: all([int(y) > 0 for y in x]) }, "RunWhitelist": { "default": [], "type": makeList, "null": False, "validate": lambda x: all([int(y) > 0 for y in x]) }, "BlockBlacklist": { "default": [], "type": makeList, "validate": lambda x: all([block(y) for y in x]) }, "BlockWhitelist": { "default": [], "type": makeList, "validate": lambda x: all([block(y) for y in x]) }, "SplittingAlgo": { "default": "EventAwareLumiBased", "null": False, "validate": lambda x: x in [ "EventBased", "LumiBased", "EventAwareLumiBased", "FileBased" ], "attr": "procJobSplitAlgo" }, "EventsPerJob": { "type": int, "validate": lambda x: x > 0, "null": True }, "LumisPerJob": { "default": 8, "type": int, "null": False, "validate": lambda x: x > 0 }, "FilesPerJob": { "default": 1, "type": int, "null": False, "validate": lambda x: x > 0 } } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __init__(self): StdBase.__init__(self) self.mergeMapping = {} self.taskMapping = {}
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ StdBase.__call__(self, workloadName, arguments) self.workload = self.createWorkload() for i in range(1, self.taskChain + 1): originalTaskConf = arguments["Task%d" % i] taskConf = {} # Make a shallow copy of the taskConf for k, v in originalTaskConf.items(): taskConf[k] = v parent = taskConf.get("InputTask", None) self.modifyTaskConfiguration( taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf) # Set task-specific global parameters self.blockBlacklist = taskConf["BlockBlacklist"] self.blockWhitelist = taskConf["BlockWhitelist"] self.runBlacklist = taskConf["RunBlacklist"] self.runWhitelist = taskConf["RunWhitelist"] if taskConf['Multicore'] and taskConf['Multicore'] != 'None': self.multicoreNCores = int(taskConf['Multicore']) parentTask = None if parent in self.mergeMapping: parentTask = self.mergeMapping[parent][parentTaskModule( taskConf)] task = self.makeTask(taskConf, parentTask) if i == 1: # First task will either be generator or processing self.workload.setDashboardActivity("relval") if isGenerator(arguments): # generate mc events self.workload.setWorkQueueSplitPolicy( "MonteCarlo", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.workload.setEndPolicy("SingleShot") self.setupGeneratorTask(task, taskConf) else: # process an existing dataset self.workload.setWorkQueueSplitPolicy( "Block", taskConf['SplittingAlgo'], taskConf['SplittingArguments']) self.setupTask(task, taskConf) self.reportWorkflowToDashboard( self.workload.getDashboardActivity()) else: # all subsequent tasks have to be processing tasks self.setupTask(task, taskConf) self.taskMapping[task.name()] = taskConf self.workload.ignoreOutputModules(self.ignoredOutputModules) return self.workload
class ReqMgrService(TemplatedPage): """ Request Manager web service class """ def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir) self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += [ 'application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript' ] cherryconf.update({ 'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize access to reqmgr2 APIs self.reqmgr_url = config.reqmgr.reqmgr2_url self.reqmgr = ReqMgr(self.reqmgr_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_7_6_1') self.sw_arch = cdict.get('default_sw_scramarch', 'slc6_amd64_gcc493') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier) # local team cache which will request data from wmstats base, uri = self.reqmgr_url.split('://') base_url = '%s://%s' % (base, uri.split('/')[0]) self.wmstatsurl = cdict.get('wmstats_url', '%s/wmstatsserver' % base_url) if not self.wmstatsurl: raise Exception( 'ReqMgr2 configuration file does not provide wmstats url') self.team_cache = [] # fetch assignment arguments specification from StdBase self.assignArgs = StdBase().getWorkloadAssignArgs() self.assignArgs = { key: val['default'] for key, val in self.assignArgs.items() } def getTeams(self): "Helper function to get teams from wmstats or local cache" teams = self.team_cache url = '%s/data/teams' % self.wmstatsurl params = {} headers = {'Accept': 'application/json'} try: data = getdata(url, params, headers) if 'error' in data: print("WARNING: fail to get teams from %s" % url) print(data) teams = data.get('result', []) self.team_cache = teams except Exception as exp: print("WARNING: fail to get teams from %s" % url) print(str(exp)) return teams def update_scripts(self, force=False): "Update scripts dict" if force or abs(time.time() - self.sdict['ts']) > self.sdict_thr: for item in os.listdir(self.sdir): with open(os.path.join(self.sdir, item), 'r') as istream: self.sdict[item.split('.')[0]] = istream.read() self.sdict['ts'] = time.time() def abs_page(self, tmpl, content): """generate abstract page""" menu = self.templatepage('menu', menus=menus(), tmpl=tmpl) body = self.templatepage('generic', menu=menu, content=content) page = self.templatepage('main', content=body, user=user()) return page def page(self, content): """ Provide page wrapped with top/bottom templates. """ return self.templatepage('main', content=content) def error(self, content): "Generate common error page" content = self.templatepage('error', content=content) return self.abs_page('error', content) @expose def index(self): """Main page""" content = self.templatepage('index', requests=ACTIVE_STATUS, rdict=REQUEST_STATE_TRANSITION) return self.abs_page('main', content) @expose def home(self, **kwds): """Main page""" return self.index(**kwds) ### Request actions ### @expose @checkargs(['status', 'sort']) def assign(self, **kwds): """assign page""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'assignment-approved'}) docs = [] attrs = [ 'RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus' ] dataResult = self.reqmgr.getRequestByStatus( statusList=[kwds['status']]) for data in dataResult: for val in data.values(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] assignDict = deepcopy(self.assignArgs) assignDict.update(getPropValueMap()) assignDict['Team'] = self.getTeams() filter_sort = self.templatepage('filter_sort') content = self.templatepage('assign', sort=sortby, filter_sort_table=filter_sort, sites=SITE_CACHE.getData(), site_white_list=site_white_list(), site_black_list=site_black_list(), user=user(), user_dn=user_dn(), requests=toString(docs), misc_table=json2table( assignDict, web_ui_names(), "all_attributes"), misc_json=json2form(assignDict, indent=2, keep_first_value=True)) return self.abs_page('assign', content) @expose @checkargs(['status', 'sort']) def approve(self, **kwds): """ Approve page: get list of request associated with user DN. Fetch their status list from ReqMgr and display if requests were seen by data-ops. """ if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'new'}) kwds.update({'_nostale': True}) docs = [] attrs = [ 'RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus', 'Campaign' ] dataResult = self.reqmgr.getRequestByStatus( statusList=[kwds['status']]) for data in dataResult: for val in data.values(): docs.append(request_attr(val, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('approve', requests=toString(docs), date=tstamp(), sort=sortby, filter_sort_table=filter_sort, gen_color=gen_color) return self.abs_page('approve', content) @expose def create(self, **kwds): """create page""" # get list of standard specs from WMCore and new ones from local area # loc_specs_dir = os.path.join(self.spdir, 'Specs') # local specs # loc_specs = spec_list(loc_specs_dir, 'Specs') # all_specs = list(set(self.std_specs + loc_specs)) # all_specs.sort() all_specs = list(self.std_specs) spec = kwds.get('form', '') if not spec: spec = self.std_specs[0] # make spec first in all_specs list if spec in all_specs: all_specs.remove(spec) all_specs = [spec] + all_specs jsondata = get_request_template_from_type(spec) # create templatized page out of provided forms self.update_scripts() content = self.templatepage( 'create', table=json2table(jsondata, web_ui_names(), jsondata), jsondata=json2form(jsondata, indent=2, keep_first_value=True), name=spec, scripts=[s for s in self.sdict.keys() if s != 'ts'], specs=all_specs) return self.abs_page('create', content) def generate_objs(self, script, jsondict): """Generate objects from givem JSON template""" self.update_scripts() code = self.sdict.get(script, '') if code.find('def genobjs(jsondict)') == -1: return self.error( "Improper python snippet, your code should start with <b>def genobjs(jsondict)</b> function" ) exec(code) # code snippet must starts with genobjs function return [r for r in genobjs(jsondict)] @expose def config(self, name): "Fetch config for given request name" result = self.reqmgr.getConfig(name) if len(result) == 1: result = result[0] else: result = 'Configuration not found for: %s' % name return result.replace('\n', '<br/>') @expose def fetch(self, rid): "Fetch document for given id" rid = rid.replace('request-', '') doc = self.reqmgr.getRequestByNames(rid) transitions = [] tst = time.time() # get request tasks tasks = self.reqmgr.getRequestTasks(rid) if len(doc) == 1: try: doc = doc[0][rid] except: pass name = doc.get('RequestName', 'NA') title = 'Request %s' % name status = doc.get('RequestStatus', '') transitions = REQUEST_STATE_TRANSITION.get(status, []) if status in transitions: transitions.remove(status) visible_attrs = get_modifiable_properties(status) filterout_attrs = get_protected_properties() # extend filterout list with "RequestStatus" since it is passed separately filterout_attrs.append("RequestStatus") for key, val in self.assignArgs.items(): if not doc.get(key): doc[key] = val if visible_attrs == "all_attributes": filteredDoc = doc for prop in filterout_attrs: if prop in filteredDoc: del filteredDoc[prop] else: filteredDoc = {} for prop in visible_attrs: filteredDoc[prop] = doc.get(prop, "") propValueMap = getPropValueMap() propValueMap['Team'] = self.getTeams() selected = {} for prop in propValueMap: if prop in filteredDoc: filteredDoc[prop], selected[prop] = reorder_list( propValueMap[prop], filteredDoc[prop]) content = self.templatepage( 'doc', title=title, status=status, name=name, rid=rid, tasks=json2form(tasks, indent=2, keep_first_value=False), table=json2table(filteredDoc, web_ui_names(), visible_attrs, selected), jsondata=json2form(doc, indent=2, keep_first_value=False), doc=json.dumps(doc), time=time, tasksConfigs=tasks_configs(doc, html=True), sTransition=state_transition(doc), pTransition=priority_transition(doc), transitions=transitions, ts=tst, user=user(), userdn=user_dn()) elif len(doc) > 1: jsondata = [pprint.pformat(d) for d in doc] content = self.templatepage('doc', title='Series of docs: %s' % rid, table="", jsondata=jsondata, time=time, tasksConfigs=tasks_configs(doc, html=True), sTransition=state_transition(doc), pTransition=priority_transition(doc), transitions=transitions, ts=tst, user=user(), userdn=user_dn()) else: doc = 'No request found for name=%s' % rid return self.abs_page('request', content) @expose def record2logdb(self, **kwds): """LogDB submission page""" print(kwds) request = kwds['request'] msg = kwds['message'] self.logdb.post(request, msg) msg = '<h6>Confirmation</h6>Your request has been entered to LogDB.' return self.abs_page('generic', msg) @expose def requests(self, **kwds): """Page showing requests""" if not kwds: kwds = {} if 'status' not in kwds: kwds.update({'status': 'acquired'}) dataResult = self.reqmgr.getRequestByStatus(kwds['status']) attrs = [ 'RequestName', 'RequestDate', 'Group', 'Requestor', 'RequestStatus', 'Campaign' ] docs = [] for data in dataResult: for doc in data.values(): docs.append(request_attr(doc, attrs)) sortby = kwds.get('sort', 'status') docs = [r for r in sort(docs, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('requests', requests=toString(docs), sort=sortby, status=kwds['status'], filter_sort_table=filter_sort) return self.abs_page('requests', content) @expose def request(self, **kwargs): "Get data example and expose it as json" dataset = kwargs.get('uinput', '') if not dataset: return {'error': 'no input dataset'} url = 'https://cmsweb.cern.ch/reqmgr2/data/request?outputdataset=%s' % dataset params = {} headers = {'Accept': 'application/json'} wdata = getdata(url, params, headers) wdict = dict(date=time.ctime(), team='Team-A', status='Running', ID=genid(wdata)) winfo = self.templatepage('workflow', wdict=wdict, dataset=dataset, code=pprint.pformat(wdata)) content = self.templatepage('search', content=winfo) return self.abs_page('request', content) @expose def batch(self, **kwds): """batch page""" # TODO: we need a template for batch attributes # and read it from separate area, like DASMaps name = kwds.get('name', '') batch = {} if name: # batch = self.reqmgr.getBatchesByName(name) batch = { 'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Attributes': { 'HeavyIon': ['true', 'false'] } } attributes = batch.get('Attributes', {}) workflows = batch.get('Workflows', []) description = batch.get('Description', '') creator = batch.get('Creator', user_dn()) content = self.templatepage('batch', name=name, attributes=json2table( attributes, web_ui_names()), workflows=workflows, creator=creator, description=description) return self.abs_page('batch', content) @expose def batches(self, **kwds): """Page showing batches""" if not kwds: kwds = {} if 'name' not in kwds: kwds.update({'name': ''}) sortby = kwds.get('sort', 'name') # results = self.reqmgr.getBatchesByName(kwds['name']) results = [ { 'Name': 'Batch1', 'Description': 'Bla-bla', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 13 10:36:41 EST 2015', 'Attributes': { 'HeavyIon': ['true', 'false'] } }, { 'Name': 'Batch2', 'Description': 'lksdjflksjdf', 'Creator': 'valya', 'Group': 'test', 'Workflows': ['workflow1', 'workflow2'], 'Date': 'Fri Feb 10 10:36:41 EST 2015', 'Attributes': { 'HeavyIon': ['true', 'false'] } }, ] docs = [r for r in sort(results, sortby)] filter_sort = self.templatepage('filter_sort') content = self.templatepage('batches', batches=docs, sort=sortby, filter_sort_table=filter_sort) return self.abs_page('batches', content) ### Aux methods ### @expose def put_request(self, **kwds): "PUT request callback to reqmgr server, should be used in AJAX" reqname = kwds.get('RequestName', '') status = kwds.get('RequestStatus', '') if not reqname: msg = 'Unable to update request status, empty request name' raise cherrypy.HTTPError(406, msg) if not status: msg = 'Unable to update request status, empty status value' raise cherrypy.HTTPError(406, msg) return self.reqmgr.updateRequestStatus(reqname, status) @expose def images(self, *args): """ Serve static images. """ args = list(args) check_scripts(args, self.imgmap, self.imgdir) mime_types = [ '*/*', 'image/gif', 'image/png', 'image/jpg', 'image/jpeg' ] accepts = cherrypy.request.headers.elements('Accept') for accept in accepts: if accept.value in mime_types and len(args) == 1 \ and args[0] in self.imgmap: image = self.imgmap[args[0]] # use image extension to pass correct content type ctype = 'image/%s' % image.split('.')[-1] cherrypy.response.headers['Content-type'] = ctype return serve_file(image, content_type=ctype) def serve(self, kwds, imap, idir, datatype='', minimize=False): "Serve files for high level APIs (yui/css/js)" args = [] for key, val in kwds.items(): if key == 'f': # we only look-up files from given kwds dict if isinstance(val, list): args += val else: args.append(val) scripts = check_scripts(args, imap, idir) return self.serve_files(args, scripts, imap, datatype, minimize) @exposecss @tools.gzip() def css(self, **kwargs): """ Serve provided CSS files. They can be passed as f=file1.css&f=file2.css """ resource = kwargs.get('resource', 'css') if resource == 'css': return self.serve(kwargs, self.cssmap, self.cssdir, 'css', True) @exposejs @tools.gzip() def js(self, **kwargs): """ Serve provided JS scripts. They can be passed as f=file1.js&f=file2.js with optional resource parameter to speficy type of JS files, e.g. resource=yui. """ resource = kwargs.get('resource', 'js') if resource == 'js': return self.serve(kwargs, self.jsmap, self.jsdir) def serve_files(self, args, scripts, resource, datatype='', minimize=False): """ Return asked set of files for JS, YUI, CSS. """ idx = "-".join(scripts) if idx not in self._cache.keys(): data = '' if datatype == 'css': data = '@CHARSET "UTF-8";' for script in args: path = os.path.join(sys.path[0], resource[script]) path = os.path.normpath(path) ifile = open(path) data = "\n".join([data, ifile.read(). \ replace('@CHARSET "UTF-8";', '')]) ifile.close() if datatype == 'css': set_headers("text/css") if minimize: self._cache[idx] = minify(data) else: self._cache[idx] = data return self._cache[idx]
def getWorkloadArguments(): baseArgs = StdBase.getWorkloadArguments() reqMgrArgs = StdBase.getWorkloadArgumentsWithReqMgr() baseArgs.update(reqMgrArgs) specArgs = { "InputDataset": { "default": "/MinimumBias/ComissioningHI-v1/RAW", "type": str, "optional": False, "validate": dataset, "attr": "inputDataset", "null": False }, "GlobalTag": { "default": "GT_DP_V1", "type": str, "optional": False, "validate": None, "attr": "globalTag", "null": False }, "OpenRunningTimeout": { "default": 0, "type": int, "optional": True, "validate": lambda x: x >= 0, "attr": "openRunningTimeout", "null": False }, "BlockBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockBlacklist", "null": False }, "BlockWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([block(y) for y in x]), "attr": "blockWhitelist", "null": False }, "RunBlacklist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runBlacklist", "null": False }, "RunWhitelist": { "default": [], "type": makeList, "optional": True, "validate": lambda x: all([int(y) > 0 for y in x]), "attr": "runWhitelist", "null": False }, "SplittingAlgo": { "default": "EventAwareLumiBased", "type": str, "optional": True, "validate": lambda x: x in [ "EventBased", "LumiBased", "EventAwareLumiBased", "FileBased" ], "attr": "procJobSplitAlgo", "null": False }, "EventsPerJob": { "default": None, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "eventsPerJob", "null": True }, "LumisPerJob": { "default": 8, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "lumisPerJob", "null": False }, "FilesPerJob": { "default": 1, "type": int, "optional": True, "validate": lambda x: x > 0, "attr": "filesPerJob", "null": False } } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadArguments(): """ If arg is not specifyed, automatically set by following default value - default: None - type: str - optional: True - assign_optional: True - validate: None - attr: change first letter to lower case - null: False """ baseArgs = StdBase.getWorkloadArguments() ## specArgs = {"Outputs" : {"default" : {}, "type" : dict, ## "optional" : False, "validate" : None, ## "attr" : "outputs", "null" : False}, specArgs = { "RequestType": { "default": "Express" }, "Scenario": { "optional": False, "attr": "procScenario" }, "RecoCMSSWVersion": { "optional": False, "validate": cmsswversion, "attr": "recoFrameworkVersion", "null": True }, "RecoScramArch": { "optional": False, "null": True }, "GlobalTag": { "optional": False }, "GlobalTagTransaction": { "optional": False }, "StreamName": { "optional": False }, "SpecialDataset": { "optional": False }, "AlcaHarvestTimeout": { "type": int, "optional": False }, "AlcaHarvestDir": { "optional": False, "null": True }, "AlcaSkims": { "type": makeList, "optional": False }, "DqmSequences": { "type": makeList, "optional": False }, "BlockCloseDelay": { "type": int, "optional": False, "validate": lambda x: x > 0 }, } baseArgs.update(specArgs) StdBase.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __init__(self, app, config, mount): self.base = config.base self.rootdir = '/'.join(WMCore.__file__.split('/')[:-1]) if config and not isinstance(config, dict): web_config = config.dictionary_() if not config: web_config = {'base': self.base} TemplatedPage.__init__(self, web_config) imgdir = os.environ.get('RM_IMAGESPATH', os.getcwd() + '/images') self.imgdir = web_config.get('imgdir', imgdir) cssdir = os.environ.get('RM_CSSPATH', os.getcwd() + '/css') self.cssdir = web_config.get('cssdir', cssdir) jsdir = os.environ.get('RM_JSPATH', os.getcwd() + '/js') self.jsdir = web_config.get('jsdir', jsdir) spdir = os.environ.get('RM_SPECPATH', os.getcwd() + '/specs') self.spdir = web_config.get('spdir', spdir) # read scripts area and initialize data-ops scripts self.sdir = os.environ.get('RM_SCRIPTS', os.getcwd() + '/scripts') self.sdir = web_config.get('sdir', self.sdir) self.sdict_thr = web_config.get('sdict_thr', 600) # put reasonable 10 min interval self.sdict = {'ts': time.time()} # placeholder for data-ops scripts self.update_scripts(force=True) # To be filled at run time self.cssmap = {} self.jsmap = {} self.imgmap = {} self.yuimap = {} std_specs_dir = os.path.join(self.rootdir, 'WMSpec/StdSpecs') self.std_specs = spec_list(std_specs_dir) self.std_specs.sort() # Update CherryPy configuration mime_types = ['text/css'] mime_types += [ 'application/javascript', 'text/javascript', 'application/x-javascript', 'text/x-javascript' ] cherryconf.update({ 'tools.encode.on': True, 'tools.gzip.on': True, 'tools.gzip.mime_types': mime_types, }) self._cache = {} # initialize access to reqmgr2 APIs self.reqmgr_url = config.reqmgr.reqmgr2_url self.reqmgr = ReqMgr(self.reqmgr_url) # only gets current view (This might cause to reponse time much longer, # If upto date view is not needed overwrite Fale) self.reqmgr._noStale = True # get fields which we'll use in templates cdict = config.reqmgr.dictionary_() self.couch_url = cdict.get('couch_host', '') self.couch_dbname = cdict.get('couch_reqmgr_db', '') self.couch_wdbname = cdict.get('couch_workload_summary_db', '') self.acdc_url = cdict.get('acdc_host', '') self.acdc_dbname = cdict.get('acdc_db', '') self.configcache_url = cdict.get('couch_config_cache_url', self.couch_url) self.dbs_url = cdict.get('dbs_url', '') self.dqm_url = cdict.get('dqm_url', '') self.sw_ver = cdict.get('default_sw_version', 'CMSSW_7_6_1') self.sw_arch = cdict.get('default_sw_scramarch', 'slc6_amd64_gcc493') # LogDB holder centralurl = cdict.get("central_logdb_url", "") identifier = cdict.get("log_reporter", "reqmgr2") self.logdb = LogDB(centralurl, identifier) # local team cache which will request data from wmstats base, uri = self.reqmgr_url.split('://') base_url = '%s://%s' % (base, uri.split('/')[0]) self.wmstatsurl = cdict.get('wmstats_url', '%s/wmstatsserver' % base_url) if not self.wmstatsurl: raise Exception( 'ReqMgr2 configuration file does not provide wmstats url') self.team_cache = [] # fetch assignment arguments specification from StdBase self.assignArgs = StdBase().getWorkloadAssignArgs() self.assignArgs = { key: val['default'] for key, val in self.assignArgs.items() }