def getWorkloadArguments(): baseArgs = DataProcessing.getWorkloadArguments() specArgs = {"RequestType": {"default": "MonteCarloFromGEN", "optional": True, "attr": "requestType"}, "PrimaryDataset": {"default": None, "type": str, "optional": True, "validate": primdataset, "attr": "inputPrimaryDataset", "null": True}, "ConfigCacheUrl": {"default": None, "type": str, "optional": True, "validate": None, "attr": "configCacheUrl", "null": False}, "ConfigCacheID": {"default": None, "type": str, "optional": False, "validate": None, "attr": "configCacheID", "null": True}, "MCPileup": {"default": None, "type": str, "optional": True, "validate": dataset, "attr": "mcPileup", "null": True}, "DataPileup": {"default": None, "type": str, "optional": True, "validate": dataset, "attr": "dataPileup", "null": True}, "DeterministicPileup": {"default": False, "type": strToBool, "optional": True, "validate": None, "attr": "deterministicPileup", "null": False}} baseArgs.update(specArgs) DataProcessing.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __call__(self, workloadName, arguments): """ _call_ Create a ReDigi workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) # Adjust the sizePerEvent, timePerEvent and memory for step two and three if self.stepTwoTimePerEvent is None: self.stepTwoTimePerEvent = self.timePerEvent if self.stepTwoSizePerEvent is None: self.stepTwoSizePerEvent = self.sizePerEvent if self.stepTwoMemory is None: self.stepTwoMemory = self.memory if self.stepThreeTimePerEvent is None: self.stepThreeTimePerEvent = self.timePerEvent if self.stepThreeSizePerEvent is None: self.stepThreeSizePerEvent = self.sizePerEvent if self.stepThreeMemory is None: self.stepThreeMemory = self.memory return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a DQMHarvest workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.workload.setDashboardActivity("harvesting") splitArgs = {"runs_per_job": 1} if self.dqmHarvestUnit == "multiRun": # then it should result in a single job in the end, very high number of runs splitArgs['runs_per_job'] = 999999 self.workload.setWorkQueueSplitPolicy("Dataset", "Harvest", splitArgs) # also creates the logCollect job by default self.addDQMHarvestTask(uploadProxy=self.dqmUploadProxy, periodic_harvest_interval=self.periodicHarvestInterval, dqmHarvestUnit=self.dqmHarvestUnit) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) return self.workload
def getWorkloadArguments(): baseArgs = DataProcessing.getWorkloadArguments() specArgs = { "RequestType": { "default": "MonteCarloFromGEN", "optional": True, "attr": "requestType" }, "PrimaryDataset": { "default": None, "type": str, "optional": True, "validate": primdataset, "attr": "inputPrimaryDataset", "null": False }, "ConfigCacheUrl": { "default": None, "type": str, "optional": True, "validate": None, "attr": "configCacheUrl", "null": False }, "ConfigCacheID": { "default": None, "type": str, "optional": False, "validate": None, "attr": "configCacheID", "null": True }, "MCPileup": { "default": None, "type": str, "optional": True, "validate": dataset, "attr": "mcPileup", "null": True }, "DataPileup": { "default": None, "type": str, "optional": True, "validate": dataset, "attr": "dataPileup", "null": True }, "DeterministicPileup": { "default": False, "type": strToBool, "optional": True, "validate": None, "attr": "deterministicPileup", "null": False } } baseArgs.update(specArgs) DataProcessing.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # Arrange the skims in a skimConfig object (i.e. a list of skim configurations) self.skimConfigs = [] skimIndex = 1 while "SkimName%s" % skimIndex in arguments: skimConfig = {} skimConfig["SkimName"] = arguments["SkimName%s" % skimIndex] skimConfig["SkimInput"] = arguments["SkimInput%s" % skimIndex] skimConfig["ConfigCacheID"] = arguments["Skim%sConfigCacheID" % skimIndex] skimConfig["TimePerEvent"] = float(arguments.get("SkimTimePerEvent%s" % skimIndex, self.timePerEvent)) skimConfig["SizePerEvent"] = float(arguments.get("SkimSizePerEvent%s" % skimIndex, self.sizePerEvent)) skimConfig["Memory"] = float(arguments.get("SkimMemory%s" % skimIndex, self.memory)) skimConfig["SkimJobSplitAlgo"] = arguments.get("SkimSplittingAlgo%s" % skimIndex, "FileBased") skimConfig["SkimJobSplitArgs"] = {"include_parents" : True} if skimConfig["SkimJobSplitAlgo"] == "FileBased": skimConfig["SkimJobSplitArgs"]["files_per_job"] = int(arguments.get("SkimFilesPerJob%s" % skimIndex, 1)) elif skimConfig["SkimJobSplitAlgo"] == "EventBased" or skimConfig["SkimJobSplitAlgo"] == "EventAwareLumiBased": skimConfig["SkimJobSplitArgs"]["events_per_job"] = int(arguments.get("SkimEventsPerJob%s" % skimIndex, int((8.0 * 3600.0) / skimConfig["TimePerEvent"]))) if skimConfig["SkimJobSplitAlgo"] == "EventAwareLumiBased": skimConfig["SkimJobSplitAlgo"]["max_events_per_lumi"] = 20000 elif skimConfig["SkimJobSplitAlgo"] == "LumiBased": skimConfig["SkimJobSplitArgs"["lumis_per_job"]] = int(arguments.get("SkimLumisPerJob%s" % skimIndex, 8)) self.skimConfigs.append(skimConfig) skimIndex += 1 return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a DQMHarvest workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) self.workload = self.createWorkload() self.workload.setDashboardActivity("harvesting") splitArgs = {"runs_per_job": 1} if self.dqmHarvestUnit == "multiRun": # then it should result in a single job in the end, very high number of runs splitArgs['runs_per_job'] = 999999 self.workload.setWorkQueueSplitPolicy("Dataset", "Harvest", splitArgs) # also creates the logCollect job by default self.addDQMHarvestTask( uploadProxy=self.dqmUploadProxy, periodic_harvest_interval=self.periodicHarvestInterval, dqmHarvestUnit=self.dqmHarvestUnit) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString self.workload.setTaskPropertiesFromWorkload() self.reportWorkflowToDashboard(self.workload.getDashboardActivity()) return self.workload
def getWorkloadCreateArgs(): baseArgs = DataProcessing.getWorkloadCreateArgs() specArgs = {"RequestType": {"default": "ReDigi", "optional": False}, "StepOneOutputModuleName": {"null": True}, "StepTwoOutputModuleName": {"null": True}, "ConfigCacheID": {"optional": True, "null": True}, "StepOneConfigCacheID": {"optional": False, "null": True}, "StepTwoConfigCacheID": {"null": True}, "StepThreeConfigCacheID": {"null": True}, "KeepStepOneOutput": {"default": True, "type": strToBool, "null": False}, "KeepStepTwoOutput": {"default": True, "type": strToBool, "null": False}, "StepTwoTimePerEvent": {"type": float, "null": True, "validate": lambda x: x > 0}, "StepThreeTimePerEvent": {"type": float, "null": True, "validate": lambda x: x > 0}, "StepTwoSizePerEvent": {"type": float, "null": True, "validate": lambda x: x > 0}, "StepThreeSizePerEvent": {"type": float, "null": True, "validate": lambda x: x > 0}, "StepTwoMemory": {"type": float, "null": True, "validate": lambda x: x > 0}, "StepThreeMemory": {"type": float, "null": True, "validate": lambda x: x > 0}, "MCPileup": {"validate": dataset, "attr": "mcPileup", "null": True}, "DataPileup": {"null": True, "validate": dataset}, "DeterministicPileup": {"default": False, "type": strToBool, "null": False}} baseArgs.update(specArgs) DataProcessing.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __call__(self, workloadName, arguments): """ _call_ Create a MonteCarloFromGEN workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) return self.buildWorkload()
def getWorkloadAssignArgs(): baseArgs = DataProcessing.getWorkloadAssignArgs() specArgs = { "Override": {"default": {"eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/PromptReco"}, "type": dict}, } baseArgs.update(specArgs) DataProcessing.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadArguments(): baseArgs = DataProcessing.getWorkloadArguments() specArgs = {"RequestType" : {"default" : "ReDigi", "optional" : True, "attr" : "requestType"}, "StepOneOutputModuleName" : {"default" : None, "type" : str, "optional" : True, "validate" : None, "attr" : "stepOneOutputModuleName", "null" : True}, "StepTwoOutputModuleName" : {"default" : None, "type" : str, "optional" : True, "validate" : None, "attr" : "stepTwoOutputModuleName", "null" : True}, "ConfigCacheID": {"default" : None, "optional": True, "null": True}, "StepOneConfigCacheID" : {"default" : None, "type" : str, "optional" : False, "validate" : None, "attr" : "stepOneConfigCacheID", "null" : True}, "StepTwoConfigCacheID" : {"default" : None, "type" : str, "optional" : True, "validate" : None, "attr" : "stepTwoConfigCacheID", "null" : True}, "StepThreeConfigCacheID" : {"default" : None, "type" : str, "optional" : True, "validate" : None, "attr" : "stepThreeConfigCacheID", "null" : True}, "KeepStepOneOutput" : {"default" : True, "type" : strToBool, "optional" : True, "validate" : None, "attr" : "keepStepOneOutput", "null" : False}, "KeepStepTwoOutput" : {"default" : True, "type" : strToBool, "optional" : True, "validate" : None, "attr" : "keepStepTwoOutput", "null" : False}, "StepTwoTimePerEvent" : {"default" : 1, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepTwoTimePerEvent", "null" : False}, "StepThreeTimePerEvent" : {"default" : 1, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepThreeTimePerEvent", "null" : False}, "StepTwoSizePerEvent" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepTwoSizePerEvent", "null" : True}, "StepThreeSizePerEvent" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepThreeSizePerEvent", "null" : True}, "StepTwoMemory" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepTwoMemory", "null" : True}, "StepThreeMemory" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepThreeMemory", "null" : True}, "MCPileup" : {"default" : None, "type" : str, "optional" : True, "validate" : dataset, "attr" : "mcPileup", "null" : True}, "DataPileup" : {"default" : None, "type" : str, "optional" : True, "validate" : dataset, "attr" : "dataPileup", "null" : True}, "DeterministicPileup" : {"default" : False, "type" : strToBool, "optional" : True, "validate" : None, "attr" : "deterministicPileup", "null" : False}} baseArgs.update(specArgs) DataProcessing.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadCreateArgs(): baseArgs = DataProcessing.getWorkloadCreateArgs() specArgs = {"RequestType": {"default": "DQMHarvest", "optional": True}, "ConfigCacheID": {"optional": True, "null": True}, "DQMConfigCacheID": {"optional": False, "attr": "dqmConfigCacheID"}, "DQMUploadUrl": {"optional": False, "attr": "dqmUploadUrl"}, } baseArgs.update(specArgs) DataProcessing.setDefaultArgumentsProperty(baseArgs) return baseArgs
def getWorkloadCreateArgs(): baseArgs = DataProcessing.getWorkloadCreateArgs() specArgs = {"RequestType" : {"default" : "ReReco", "optional" : False}, "TransientOutputModules" : {"default" : [], "type" : makeList, "attr" : "transientModules", "null" : False} } baseArgs.update(specArgs) DataProcessing.setDefaultArgumentsProperty(baseArgs) return baseArgs
def validateSchema(self, schema): """ _validateSchema_ Standard DataProcessing schema validation. """ DataProcessing.validateSchema(self, schema) self.validateConfigCacheExists(configID=schema["DQMConfigCacheID"], configCacheUrl=schema['ConfigCacheUrl'], couchDBName=schema["CouchDBName"], getOutputModules=False)
def validateSchema(self, schema): """ _validateSchema_ Check for required fields, and some skim facts """ DataProcessing.validateSchema(self, schema) mainOutputModules = list( self.validateConfigCacheExists( configID=schema["ConfigCacheID"], configCacheUrl=schema['ConfigCacheUrl'], couchDBName=schema["CouchDBName"], getOutputModules=True)) # Skim facts have to be validated outside the usual master validation skimSchema = { k: v for (k, v) in viewitems(schema) if k.startswith("Skim") } skimArguments = self.getSkimArguments() skimIndex = 1 skimInputs = set() while "SkimName%s" % skimIndex in schema: instanceArguments = {} for argument in skimArguments: realArg = argument.replace("#N", str(skimIndex)) instanceArguments[realArg] = skimArguments[argument] try: validateArgumentsCreate(skimSchema, instanceArguments) except Exception as ex: self.raiseValidationException(str(ex)) self.validateConfigCacheExists( configID=schema["Skim%sConfigCacheID" % skimIndex], configCacheUrl=schema['ConfigCacheUrl'], couchDBName=schema["CouchDBName"], getOutputModules=False) if schema["SkimInput%s" % skimIndex] not in mainOutputModules: error = "Processing config does not have the following output module: %s." % schema[ "SkimInput%s" % skimIndex] self.raiseValidationException(msg=error) skimInputs.add(schema["SkimInput%s" % skimIndex]) skimIndex += 1 # Validate that the transient output modules are used in a skim task if "TransientOutputModules" in schema: diffSet = set(schema["TransientOutputModules"]) - skimInputs if diffSet: self.raiseValidationException( msg= "A transient output module was specified but no skim was defined for it" )
def validateSchema(self, schema): """ _validateSchema_ Standard StdBase schema validation, plus verification of the ConfigCacheID """ DataProcessing.validateSchema(self, schema) couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"] self.validateConfigCacheExists(configID=schema["ConfigCacheID"], couchURL=couchUrl, couchDBName=schema["CouchDBName"]) return
def __call__(self, workloadName, arguments): """ _call_ Create a MonteCarloFromGEN workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # Transform the pileup as required by the CMSSW step self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup) # Adjust the pileup splitting self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup) return self.buildWorkload()
def __init__(self): """ __init__ Setup parameters that will be later overwritten in the call, otherwise pylint will complain about them. """ DataProcessing.__init__(self) self.stepTwoMemory = None self.stepTwoSizePerEvent = None self.stepTwoTimePerEvent = None self.stepThreeMemory = None self.stepThreeSizePerEvent = None self.stepThreeTimePerEvent = None
def validateSchema(self, schema): """ _validateSchema_ Standard StdBase schema validation, plus verification of the ConfigCacheID """ DataProcessing.validateSchema(self, schema) self.validateConfigCacheExists(configID=schema["ConfigCacheID"], configCacheUrl=schema['ConfigCacheUrl'], couchDBName=schema["CouchDBName"], getOutputModules=False) return
def validateSchema(self, schema): """ _validateSchema_ Check for required fields, and some skim facts """ DataProcessing.validateSchema(self, schema) couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"] mainOutputModules = self.validateConfigCacheExists( configID=schema["ConfigCacheID"], couchURL=couchUrl, couchDBName=schema["CouchDBName"], getOutputModules=True).keys() # Skim facts have to be validated outside the usual master validation skimArguments = self.getSkimArguments() skimIndex = 1 skimInputs = set() while "SkimName%s" % skimIndex in schema: instanceArguments = {} for argument in skimArguments.keys(): realArg = argument.replace("#N", str(skimIndex)) instanceArguments[realArg] = skimArguments[argument] msg = validateArgumentsCreate(schema, instanceArguments) if msg is not None: self.raiseValidationException(msg) self.validateConfigCacheExists( configID=schema["Skim%sConfigCacheID" % skimIndex], couchURL=couchUrl, couchDBName=schema["CouchDBName"]) if schema["SkimInput%s" % skimIndex] not in mainOutputModules: error = "Processing config does not have the following output module: %s." % schema[ "SkimInput%s" % skimIndex] self.raiseValidationException(msg=error) skimInputs.add(schema["SkimInput%s" % skimIndex]) skimIndex += 1 # Validate that the transient output modules are used in a skim task if "TransientOutputModules" in schema: diffSet = set(schema["TransientOutputModules"]) - skimInputs if diffSet: self.raiseValidationException( msg= "A transient output module was specified but no skim was defined for it" )
def getWorkloadArguments(): baseArgs = DataProcessing.getWorkloadArguments() specArgs = {"PrimaryDataset" : {"default" : None, "type" : str, "optional" : True, "validate" : primdataset, "attr" : "inputPrimaryDataset", "null" : False}, "ConfigCacheID" : {"default" : None, "type" : str, "optional" : False, "validate" : None, "attr" : "configCacheID", "null" : False}} baseArgs.update(specArgs) return baseArgs
def validateSchema(self, schema): """ _validateSchema_ Check for required fields, and some skim facts """ DataProcessing.validateSchema(self, schema) mainOutputModules = self.validateConfigCacheExists(configID = schema["ConfigCacheID"], configCacheUrl = schema['ConfigCacheUrl'], couchDBName = schema["CouchDBName"], getOutputModules = True).keys() # Skim facts have to be validated outside the usual master validation skimSchema = {k: v for (k, v) in schema.iteritems() if k.startswith("Skim")} skimArguments = self.getSkimArguments() skimIndex = 1 skimInputs = set() while "SkimName%s" % skimIndex in schema: instanceArguments = {} for argument in skimArguments: realArg = argument.replace("#N", str(skimIndex)) instanceArguments[realArg] = skimArguments[argument] try: validateArgumentsCreate(skimSchema, instanceArguments) except Exception as ex: self.raiseValidationException(str(ex)) self.validateConfigCacheExists(configID = schema["Skim%sConfigCacheID" % skimIndex], configCacheUrl = schema['ConfigCacheUrl'], couchDBName = schema["CouchDBName"], getOutputModules=False) if schema["SkimInput%s" % skimIndex] not in mainOutputModules: error = "Processing config does not have the following output module: %s." % schema["SkimInput%s" % skimIndex] self.raiseValidationException(msg = error) skimInputs.add(schema["SkimInput%s" % skimIndex]) skimIndex += 1 # Validate that the transient output modules are used in a skim task if "TransientOutputModules" in schema: diffSet = set(schema["TransientOutputModules"]) - skimInputs if diffSet: self.raiseValidationException(msg = "A transient output module was specified but no skim was defined for it")
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitArgs = {} if self.procJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]: if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs[ "job_time_limit"] = 96 * 3600 # 4 days in seconds self.procJobSplitArgs["allowCreationFailure"] = False elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = {} if self.skimJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]: if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) if self.skimJobSplitAlgo == "EventAwareLumiBased": self.skimJobSplitArgs["job_time_limit"] = 48 * 3600 # 2 days self.skimJobSplitArgs["allowCreationFailure"] = False self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob elif self.skimJobSplitAlgo == "LumiBased": self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.skimJobSplitAlgo == "FileBased": self.skimJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", { "files_per_job": 1, "include_parents": True }) return self.buildWorkload()
def getWorkloadArguments(): baseArgs = DataProcessing.getWorkloadArguments() specArgs = {"TransientOutputModules" : {"default" : [], "type" : makeList, "optional" : True, "validate" : None, "attr" : "transientModules", "null" : False}, "ConfigCacheID" : {"default" : None, "type" : str, "optional" : False, "validate" : None, "attr" : "configCacheID", "null" : False}} baseArgs.update(specArgs) return baseArgs
def validateSchema(self, schema): """ _validateSchema_ Check for required fields, and some skim facts """ DataProcessing.validateSchema(self, schema) couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"] mainOutputModules = self.validateConfigCacheExists(configID = schema["ConfigCacheID"], couchURL = couchUrl, couchDBName = schema["CouchDBName"], getOutputModules = True).keys() # Skim facts have to be validated outside the usual master validation skimArguments = self.getSkimArguments() skimIndex = 1 skimInputs = set() while "SkimName%s" % skimIndex in schema: instanceArguments = {} for argument in skimArguments.keys(): realArg = argument.replace("#N", str(skimIndex)) instanceArguments[realArg] = skimArguments[argument] msg = validateArguments(schema, instanceArguments) if msg is not None: self.raiseValidationException(msg) self.validateConfigCacheExists(configID = schema["Skim%sConfigCacheID" % skimIndex], couchURL = couchUrl, couchDBName = schema["CouchDBName"]) if schema["SkimInput%s" % skimIndex] not in mainOutputModules: error = "Processing config does not have the following output module: %s." % schema["SkimInput%s" % skimIndex] self.raiseValidationException(msg = error) skimInputs.add(schema["SkimInput%s" % skimIndex]) skimIndex += 1 # Validate that the transient output modules are used in a skim task if "TransientOutputModules" in schema: diffSet = set(schema["TransientOutputModules"]) - skimInputs if diffSet: self.raiseValidationException(msg = "A transient output module was specified but no skim was defined for it")
def getWorkloadArguments(): baseArgs = DataProcessing.getWorkloadArguments() specArgs = {"StepOneOutputModuleName" : {"default" : None, "type" : str, "optional" : True, "validate" : None, "attr" : "stepOneOutputModuleName", "null" : False}, "StepTwoOutputModuleName" : {"default" : None, "type" : str, "optional" : True, "validate" : None, "attr" : "stepTwoOutputModuleName", "null" : False}, "StepOneConfigCacheID" : {"default" : None, "type" : str, "optional" : False, "validate" : None, "attr" : "stepOneConfigCacheID", "null" : False}, "StepTwoConfigCacheID" : {"default" : None, "type" : str, "optional" : True, "validate" : None, "attr" : "stepTwoConfigCacheID", "null" : False}, "StepThreeConfigCacheID" : {"default" : None, "type" : str, "optional" : True, "validate" : None, "attr" : "stepThreeConfigCacheID", "null" : False}, "KeepStepOneOutput" : {"default" : True, "type" : strToBool, "optional" : True, "validate" : None, "attr" : "keepStepOneOutput", "null" : False}, "KeepStepTwoOutput" : {"default" : True, "type" : strToBool, "optional" : True, "validate" : None, "attr" : "keepStepTwoOutput", "null" : False}, "StepTwoTimePerEvent" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepTwoTimePerEvent", "null" : False}, "StepThreeTimePerEvent" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepThreeTimePerEvent", "null" : False}, "StepTwoSizePerEvent" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepTwoSizePerEvent", "null" : False}, "StepThreeSizePerEvent" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepThreeSizePerEvent", "null" : False}, "StepTwoMemory" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepTwoMemory", "null" : False}, "StepThreeMemory" : {"default" : None, "type" : float, "optional" : True, "validate" : lambda x : x > 0, "attr" : "stepThreeMemory", "null" : False}, "MCPileup" : {"default" : None, "type" : str, "optional" : True, "validate" : dataset, "attr" : "mcPileup", "null" : False}, "DataPileup" : {"default" : None, "type" : str, "optional" : True, "validate" : dataset, "attr" : "dataPileup", "null" : False}, "DeterministicPileup" : {"default" : False, "type" : strToBool, "optional" : True, "validate" : None, "attr" : "deterministicPileup", "null" : False}} baseArgs.update(specArgs) return baseArgs
def getWorkloadCreateArgs(): baseArgs = DataProcessing.getWorkloadCreateArgs() specArgs = {"RequestType": {"default": "PromptReco", "optional": True}, "ConfigCacheID": {"optional": True, "null": True}, "Scenario": {"default": None, "optional": False, "attr": "procScenario", "null": False}, "ProcessingString": {"default": "", "validate": procstringT0}, "WriteTiers": {"default": ["RECO", "AOD", "DQM", "ALCARECO"], "type": makeList, "optional": False, "null": False}, "AlcaSkims": {"default": ["TkAlCosmics0T", "MuAlGlobalCosmics", "HcalCalHOCosmics"], "type": makeList, "optional": False, "null": False}, "PhysicsSkims": {"default": [], "type": makeList, "optional": True, "null": False}, "InitCommand": {"default": None, "optional": True, "null": True}, "EnvPath": {"default": None, "optional": True, "null": True}, "BinPath": {"default": None, "optional": True, "null": True}, "DoLogCollect": {"default": True, "type": strToBool, "optional": True, "null": False}, "SplittingAlgo": {"default": "EventAwareLumiBased", "null": False, "validate": lambda x: x in ["EventBased", "LumiBased", "EventAwareLumiBased", "FileBased"], "attr": "procJobSplitAlgo"}, "EventsPerJob": {"default": 500, "type": int, "validate": lambda x: x > 0, "null": False}, "SkimSplittingAlgo": {"default": "FileBased", "null": False, "validate": lambda x: x in ["EventBased", "LumiBased", "EventAwareLumiBased", "FileBased"], "attr": "skimJobSplitAlgo"}, "SkimEventsPerJob": {"default": 500, "type": int, "validate": lambda x: x > 0, "null": False}, "SkimLumisPerJob": {"default": 8, "type": int, "validate": lambda x: x > 0, "null": False}, "SkimFilesPerJob": {"default": 1, "type": int, "validate": lambda x: x > 0, "null": False}, } baseArgs.update(specArgs) DataProcessing.setDefaultArgumentsProperty(baseArgs) return baseArgs
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitArgs = {} if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["max_events_per_lumi"] = 100000 elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = {} if self.skimJobSplitAlgo == "EventBased" or self.skimJobSplitAlgo == "EventAwareLumiBased": if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob if self.skimJobSplitAlgo == "EventAwareLumiBased": self.skimJobSplitArgs["max_events_per_lumi"] = 20000 elif self.skimJobSplitAlgo == "LumiBased": self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.skimJobSplitAlgo == "FileBased": self.skimJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", { "files_per_job": 1, "include_parents": True }) return self.buildWorkload()
def __call__(self, workloadName, arguments): """ _call_ Create a ReReco workload with the given parameters. """ DataProcessing.__call__(self, workloadName, arguments) # These are mostly place holders because the job splitting algo and # parameters will be updated after the workflow has been created. self.procJobSplitArgs = {} if self.procJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]: if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) self.procJobSplitArgs["events_per_job"] = self.eventsPerJob if self.procJobSplitAlgo == "EventAwareLumiBased": self.procJobSplitArgs["job_time_limit"] = 96 * 3600 # 4 days in seconds elif self.procJobSplitAlgo == "LumiBased": self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.procJobSplitAlgo == "FileBased": self.procJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = {} if self.skimJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]: if self.eventsPerJob is None: self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent) if self.skimJobSplitAlgo == "EventAwareLumiBased": self.skimJobSplitArgs["job_time_limit"] = 48 * 3600 # 2 days self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob elif self.skimJobSplitAlgo == "LumiBased": self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob elif self.skimJobSplitAlgo == "FileBased": self.skimJobSplitArgs["files_per_job"] = self.filesPerJob self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {"files_per_job": 1, "include_parents": True}) return self.buildWorkload()
def getWorkloadCreateArgs(): baseArgs = DataProcessing.getWorkloadCreateArgs() specArgs = { "RequestType": { "default": "DQMHarvest", "optional": True }, "ConfigCacheID": { "optional": True, "null": True }, "DQMConfigCacheID": { "optional": False, "attr": "dqmConfigCacheID" }, "DQMUploadUrl": { "optional": False, "attr": "dqmUploadUrl" }, } baseArgs.update(specArgs) DataProcessing.setDefaultArgumentsProperty(baseArgs) return baseArgs