Ejemplo n.º 1
0
 def getWorkloadArguments():
     baseArgs = DataProcessing.getWorkloadArguments()
     specArgs = {"RequestType": {"default": "MonteCarloFromGEN", "optional": True,
                                 "attr": "requestType"},
                 "PrimaryDataset": {"default": None, "type": str,
                                    "optional": True, "validate": primdataset,
                                    "attr": "inputPrimaryDataset", "null": True},
                 "ConfigCacheUrl": {"default": None, "type": str,
                                    "optional": True, "validate": None,
                                    "attr": "configCacheUrl", "null": False},
                 "ConfigCacheID": {"default": None, "type": str,
                                   "optional": False, "validate": None,
                                   "attr": "configCacheID", "null": True},
                 "MCPileup": {"default": None, "type": str,
                              "optional": True, "validate": dataset,
                              "attr": "mcPileup", "null": True},
                 "DataPileup": {"default": None, "type": str,
                                "optional": True, "validate": dataset,
                                "attr": "dataPileup", "null": True},
                 "DeterministicPileup": {"default": False, "type": strToBool,
                                         "optional": True, "validate": None,
                                         "attr": "deterministicPileup", "null": False}}
     baseArgs.update(specArgs)
     DataProcessing.setDefaultArgumentsProperty(baseArgs)
     return baseArgs
Ejemplo n.º 2
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReDigi workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        # Transform the pileup as required by the CMSSW step
        self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup)

        # Adjust the pileup splitting
        self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup)

        # Adjust the sizePerEvent, timePerEvent and memory for step two and three
        if self.stepTwoTimePerEvent is None:
            self.stepTwoTimePerEvent = self.timePerEvent
        if self.stepTwoSizePerEvent is None:
            self.stepTwoSizePerEvent = self.sizePerEvent
        if self.stepTwoMemory is None:
            self.stepTwoMemory = self.memory
        if self.stepThreeTimePerEvent is None:
            self.stepThreeTimePerEvent = self.timePerEvent
        if self.stepThreeSizePerEvent is None:
            self.stepThreeSizePerEvent = self.sizePerEvent
        if self.stepThreeMemory is None:
            self.stepThreeMemory = self.memory


        return self.buildWorkload()
Ejemplo n.º 3
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a DQMHarvest workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        self.workload = self.createWorkload()

        self.workload.setDashboardActivity("harvesting")

        splitArgs = {"runs_per_job": 1}
        if self.dqmHarvestUnit == "multiRun":
            # then it should result in a single job in the end, very high number of runs
            splitArgs['runs_per_job'] = 999999
        self.workload.setWorkQueueSplitPolicy("Dataset", "Harvest", splitArgs)

        # also creates the logCollect job by default
        self.addDQMHarvestTask(uploadProxy=self.dqmUploadProxy,
                               periodic_harvest_interval=self.periodicHarvestInterval,
                               dqmHarvestUnit=self.dqmHarvestUnit)

        # setting the parameters which need to be set for all the tasks
        # sets acquisitionEra, processingVersion, processingString
        self.workload.setTaskPropertiesFromWorkload()
        self.reportWorkflowToDashboard(self.workload.getDashboardActivity())

        return self.workload
Ejemplo n.º 4
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReDigi workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        # Transform the pileup as required by the CMSSW step
        self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup)

        # Adjust the pileup splitting
        self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup)

        # Adjust the sizePerEvent, timePerEvent and memory for step two and three
        if self.stepTwoTimePerEvent is None:
            self.stepTwoTimePerEvent = self.timePerEvent
        if self.stepTwoSizePerEvent is None:
            self.stepTwoSizePerEvent = self.sizePerEvent
        if self.stepTwoMemory is None:
            self.stepTwoMemory = self.memory
        if self.stepThreeTimePerEvent is None:
            self.stepThreeTimePerEvent = self.timePerEvent
        if self.stepThreeSizePerEvent is None:
            self.stepThreeSizePerEvent = self.sizePerEvent
        if self.stepThreeMemory is None:
            self.stepThreeMemory = self.memory


        return self.buildWorkload()
Ejemplo n.º 5
0
 def getWorkloadArguments():
     baseArgs = DataProcessing.getWorkloadArguments()
     specArgs = {
         "RequestType": {
             "default": "MonteCarloFromGEN",
             "optional": True,
             "attr": "requestType"
         },
         "PrimaryDataset": {
             "default": None,
             "type": str,
             "optional": True,
             "validate": primdataset,
             "attr": "inputPrimaryDataset",
             "null": False
         },
         "ConfigCacheUrl": {
             "default": None,
             "type": str,
             "optional": True,
             "validate": None,
             "attr": "configCacheUrl",
             "null": False
         },
         "ConfigCacheID": {
             "default": None,
             "type": str,
             "optional": False,
             "validate": None,
             "attr": "configCacheID",
             "null": True
         },
         "MCPileup": {
             "default": None,
             "type": str,
             "optional": True,
             "validate": dataset,
             "attr": "mcPileup",
             "null": True
         },
         "DataPileup": {
             "default": None,
             "type": str,
             "optional": True,
             "validate": dataset,
             "attr": "dataPileup",
             "null": True
         },
         "DeterministicPileup": {
             "default": False,
             "type": strToBool,
             "optional": True,
             "validate": None,
             "attr": "deterministicPileup",
             "null": False
         }
     }
     baseArgs.update(specArgs)
     DataProcessing.setDefaultArgumentsProperty(baseArgs)
     return baseArgs
Ejemplo n.º 6
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        # Arrange the skims in a skimConfig object (i.e. a list of skim configurations)
        self.skimConfigs = []
        skimIndex = 1
        while "SkimName%s" % skimIndex in arguments:
            skimConfig = {}
            skimConfig["SkimName"] = arguments["SkimName%s" % skimIndex]
            skimConfig["SkimInput"] = arguments["SkimInput%s" % skimIndex]
            skimConfig["ConfigCacheID"] = arguments["Skim%sConfigCacheID" % skimIndex]
            skimConfig["TimePerEvent"] = float(arguments.get("SkimTimePerEvent%s" % skimIndex, self.timePerEvent))
            skimConfig["SizePerEvent"] = float(arguments.get("SkimSizePerEvent%s" % skimIndex, self.sizePerEvent))
            skimConfig["Memory"] = float(arguments.get("SkimMemory%s" % skimIndex, self.memory))
            skimConfig["SkimJobSplitAlgo"] = arguments.get("SkimSplittingAlgo%s" % skimIndex, "FileBased")
            skimConfig["SkimJobSplitArgs"] = {"include_parents" : True}
            if skimConfig["SkimJobSplitAlgo"] == "FileBased":
                skimConfig["SkimJobSplitArgs"]["files_per_job"] = int(arguments.get("SkimFilesPerJob%s" % skimIndex, 1))
            elif skimConfig["SkimJobSplitAlgo"] == "EventBased" or skimConfig["SkimJobSplitAlgo"] == "EventAwareLumiBased":
                skimConfig["SkimJobSplitArgs"]["events_per_job"] = int(arguments.get("SkimEventsPerJob%s" % skimIndex, int((8.0 * 3600.0) / skimConfig["TimePerEvent"])))
                if skimConfig["SkimJobSplitAlgo"] == "EventAwareLumiBased":
                    skimConfig["SkimJobSplitAlgo"]["max_events_per_lumi"] = 20000
            elif skimConfig["SkimJobSplitAlgo"] == "LumiBased":
                skimConfig["SkimJobSplitArgs"["lumis_per_job"]] = int(arguments.get("SkimLumisPerJob%s" % skimIndex, 8))
            self.skimConfigs.append(skimConfig)
            skimIndex += 1

        return self.buildWorkload()
Ejemplo n.º 7
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a DQMHarvest workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        self.workload = self.createWorkload()

        self.workload.setDashboardActivity("harvesting")

        splitArgs = {"runs_per_job": 1}
        if self.dqmHarvestUnit == "multiRun":
            # then it should result in a single job in the end, very high number of runs
            splitArgs['runs_per_job'] = 999999
        self.workload.setWorkQueueSplitPolicy("Dataset", "Harvest", splitArgs)

        # also creates the logCollect job by default
        self.addDQMHarvestTask(
            uploadProxy=self.dqmUploadProxy,
            periodic_harvest_interval=self.periodicHarvestInterval,
            dqmHarvestUnit=self.dqmHarvestUnit)

        # setting the parameters which need to be set for all the tasks
        # sets acquisitionEra, processingVersion, processingString
        self.workload.setTaskPropertiesFromWorkload()
        self.reportWorkflowToDashboard(self.workload.getDashboardActivity())

        return self.workload
Ejemplo n.º 8
0
 def getWorkloadCreateArgs():
     baseArgs = DataProcessing.getWorkloadCreateArgs()
     specArgs = {"RequestType": {"default": "ReDigi", "optional": False},
                 "StepOneOutputModuleName": {"null": True},
                 "StepTwoOutputModuleName": {"null": True},
                 "ConfigCacheID": {"optional": True, "null": True},
                 "StepOneConfigCacheID": {"optional": False, "null": True},
                 "StepTwoConfigCacheID": {"null": True},
                 "StepThreeConfigCacheID": {"null": True},
                 "KeepStepOneOutput": {"default": True, "type": strToBool, "null": False},
                 "KeepStepTwoOutput": {"default": True, "type": strToBool, "null": False},
                 "StepTwoTimePerEvent": {"type": float, "null": True,
                                         "validate": lambda x: x > 0},
                 "StepThreeTimePerEvent": {"type": float, "null": True,
                                           "validate": lambda x: x > 0},
                 "StepTwoSizePerEvent": {"type": float, "null": True,
                                         "validate": lambda x: x > 0},
                 "StepThreeSizePerEvent": {"type": float, "null": True,
                                           "validate": lambda x: x > 0},
                 "StepTwoMemory": {"type": float, "null": True,
                                   "validate": lambda x: x > 0},
                 "StepThreeMemory": {"type": float, "null": True,
                                     "validate": lambda x: x > 0},
                 "MCPileup": {"validate": dataset, "attr": "mcPileup", "null": True},
                 "DataPileup": {"null": True, "validate": dataset},
                 "DeterministicPileup": {"default": False, "type": strToBool, "null": False}}
     baseArgs.update(specArgs)
     DataProcessing.setDefaultArgumentsProperty(baseArgs)
     return baseArgs
Ejemplo n.º 9
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        # Arrange the skims in a skimConfig object (i.e. a list of skim configurations)
        self.skimConfigs = []
        skimIndex = 1
        while "SkimName%s" % skimIndex in arguments:
            skimConfig = {}
            skimConfig["SkimName"] = arguments["SkimName%s" % skimIndex]
            skimConfig["SkimInput"] = arguments["SkimInput%s" % skimIndex]
            skimConfig["ConfigCacheID"] = arguments["Skim%sConfigCacheID" % skimIndex]
            skimConfig["TimePerEvent"] = float(arguments.get("SkimTimePerEvent%s" % skimIndex, self.timePerEvent))
            skimConfig["SizePerEvent"] = float(arguments.get("SkimSizePerEvent%s" % skimIndex, self.sizePerEvent))
            skimConfig["Memory"] = float(arguments.get("SkimMemory%s" % skimIndex, self.memory))
            skimConfig["SkimJobSplitAlgo"] = arguments.get("SkimSplittingAlgo%s" % skimIndex, "FileBased")
            skimConfig["SkimJobSplitArgs"] = {"include_parents" : True}
            if skimConfig["SkimJobSplitAlgo"] == "FileBased":
                skimConfig["SkimJobSplitArgs"]["files_per_job"] = int(arguments.get("SkimFilesPerJob%s" % skimIndex, 1))
            elif skimConfig["SkimJobSplitAlgo"] == "EventBased" or skimConfig["SkimJobSplitAlgo"] == "EventAwareLumiBased":
                skimConfig["SkimJobSplitArgs"]["events_per_job"] = int(arguments.get("SkimEventsPerJob%s" % skimIndex, int((8.0 * 3600.0) / skimConfig["TimePerEvent"])))
                if skimConfig["SkimJobSplitAlgo"] == "EventAwareLumiBased":
                    skimConfig["SkimJobSplitAlgo"]["max_events_per_lumi"] = 20000
            elif skimConfig["SkimJobSplitAlgo"] == "LumiBased":
                skimConfig["SkimJobSplitArgs"["lumis_per_job"]] = int(arguments.get("SkimLumisPerJob%s" % skimIndex, 8))
            self.skimConfigs.append(skimConfig)
            skimIndex += 1

        return self.buildWorkload()
Ejemplo n.º 10
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a MonteCarloFromGEN workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)
        return self.buildWorkload()
Ejemplo n.º 11
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a MonteCarloFromGEN workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)
        return self.buildWorkload()
Ejemplo n.º 12
0
 def getWorkloadAssignArgs():
     baseArgs = DataProcessing.getWorkloadAssignArgs()
     specArgs = {
         "Override": {"default": {"eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/PromptReco"},
                      "type": dict},
         }
     baseArgs.update(specArgs)
     DataProcessing.setDefaultArgumentsProperty(baseArgs)
     return baseArgs
Ejemplo n.º 13
0
 def getWorkloadAssignArgs():
     baseArgs = DataProcessing.getWorkloadAssignArgs()
     specArgs = {
         "Override": {"default": {"eos-lfn-prefix": "root://eoscms.cern.ch//eos/cms/store/logs/prod/recent/PromptReco"},
                      "type": dict},
         }
     baseArgs.update(specArgs)
     DataProcessing.setDefaultArgumentsProperty(baseArgs)
     return baseArgs
Ejemplo n.º 14
0
 def getWorkloadArguments():
     baseArgs = DataProcessing.getWorkloadArguments()
     specArgs = {"RequestType" : {"default" : "ReDigi", "optional" : True,
                                   "attr" : "requestType"},
                 "StepOneOutputModuleName" : {"default" : None, "type" : str,
                                              "optional" : True, "validate" : None,
                                              "attr" : "stepOneOutputModuleName", "null" : True},
                 "StepTwoOutputModuleName" : {"default" : None, "type" : str,
                                              "optional" : True, "validate" : None,
                                              "attr" : "stepTwoOutputModuleName", "null" : True},
                 "ConfigCacheID": {"default" : None, "optional": True, "null": True},
                 "StepOneConfigCacheID" : {"default" : None, "type" : str,
                                           "optional" : False, "validate" : None,
                                           "attr" : "stepOneConfigCacheID", "null" : True},
                 "StepTwoConfigCacheID" : {"default" : None, "type" : str,
                                           "optional" : True, "validate" : None,
                                           "attr" : "stepTwoConfigCacheID", "null" : True},
                 "StepThreeConfigCacheID" : {"default" : None, "type" : str,
                                             "optional" : True, "validate" : None,
                                             "attr" : "stepThreeConfigCacheID", "null" : True},
                 "KeepStepOneOutput" : {"default" : True, "type" : strToBool,
                                        "optional" : True, "validate" : None,
                                        "attr" : "keepStepOneOutput", "null" : False},
                 "KeepStepTwoOutput" : {"default" : True, "type" : strToBool,
                                        "optional" : True, "validate" : None,
                                        "attr" : "keepStepTwoOutput", "null" : False},
                 "StepTwoTimePerEvent" : {"default" : 1, "type" : float,
                                          "optional" : True, "validate" : lambda x : x > 0,
                                          "attr" : "stepTwoTimePerEvent", "null" : False},
                 "StepThreeTimePerEvent" : {"default" : 1, "type" : float,
                                            "optional" : True, "validate" : lambda x : x > 0,
                                            "attr" : "stepThreeTimePerEvent", "null" : False},
                 "StepTwoSizePerEvent" : {"default" : None, "type" : float,
                                          "optional" : True, "validate" : lambda x : x > 0,
                                          "attr" : "stepTwoSizePerEvent", "null" : True},
                 "StepThreeSizePerEvent" : {"default" : None, "type" : float,
                                            "optional" : True, "validate" : lambda x : x > 0,
                                            "attr" : "stepThreeSizePerEvent", "null" : True},
                 "StepTwoMemory" : {"default" : None, "type" : float,
                                    "optional" : True, "validate" : lambda x : x > 0,
                                    "attr" : "stepTwoMemory", "null" : True},
                 "StepThreeMemory" : {"default" : None, "type" : float,
                                      "optional" : True, "validate" : lambda x : x > 0,
                                      "attr" : "stepThreeMemory", "null" : True},
                 "MCPileup" : {"default" : None, "type" : str,
                               "optional" : True, "validate" : dataset,
                               "attr" : "mcPileup", "null" : True},
                 "DataPileup" : {"default" : None, "type" : str,
                                 "optional" : True, "validate" : dataset,
                                 "attr" : "dataPileup", "null" : True},
                 "DeterministicPileup" : {"default" : False, "type" : strToBool,
                                          "optional" : True, "validate" : None,
                                          "attr" : "deterministicPileup", "null" : False}}
     baseArgs.update(specArgs)
     DataProcessing.setDefaultArgumentsProperty(baseArgs)
     return baseArgs
Ejemplo n.º 15
0
 def getWorkloadCreateArgs():
     baseArgs = DataProcessing.getWorkloadCreateArgs()
     specArgs = {"RequestType": {"default": "DQMHarvest", "optional": True},
                 "ConfigCacheID": {"optional": True, "null": True},
                 "DQMConfigCacheID": {"optional": False, "attr": "dqmConfigCacheID"},
                 "DQMUploadUrl": {"optional": False, "attr": "dqmUploadUrl"},
                }
     baseArgs.update(specArgs)
     DataProcessing.setDefaultArgumentsProperty(baseArgs)
     return baseArgs
Ejemplo n.º 16
0
    def getWorkloadCreateArgs():

        baseArgs = DataProcessing.getWorkloadCreateArgs()
        specArgs = {"RequestType" : {"default" : "ReReco", "optional" : False},
                    "TransientOutputModules" : {"default" : [], "type" : makeList,
                                                "attr" : "transientModules", "null" : False}
                    }
        baseArgs.update(specArgs)
        DataProcessing.setDefaultArgumentsProperty(baseArgs)
        return baseArgs
Ejemplo n.º 17
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Standard DataProcessing schema validation.
        """
        DataProcessing.validateSchema(self, schema)

        self.validateConfigCacheExists(configID=schema["DQMConfigCacheID"],
                                       configCacheUrl=schema['ConfigCacheUrl'],
                                       couchDBName=schema["CouchDBName"],
                                       getOutputModules=False)
Ejemplo n.º 18
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Standard DataProcessing schema validation.
        """
        DataProcessing.validateSchema(self, schema)

        self.validateConfigCacheExists(configID=schema["DQMConfigCacheID"],
                                       configCacheUrl=schema['ConfigCacheUrl'],
                                       couchDBName=schema["CouchDBName"],
                                       getOutputModules=False)
Ejemplo n.º 19
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Check for required fields, and some skim facts
        """
        DataProcessing.validateSchema(self, schema)
        mainOutputModules = list(
            self.validateConfigCacheExists(
                configID=schema["ConfigCacheID"],
                configCacheUrl=schema['ConfigCacheUrl'],
                couchDBName=schema["CouchDBName"],
                getOutputModules=True))

        # Skim facts have to be validated outside the usual master validation
        skimSchema = {
            k: v
            for (k, v) in viewitems(schema) if k.startswith("Skim")
        }
        skimArguments = self.getSkimArguments()
        skimIndex = 1
        skimInputs = set()
        while "SkimName%s" % skimIndex in schema:
            instanceArguments = {}
            for argument in skimArguments:
                realArg = argument.replace("#N", str(skimIndex))
                instanceArguments[realArg] = skimArguments[argument]
            try:
                validateArgumentsCreate(skimSchema, instanceArguments)
            except Exception as ex:
                self.raiseValidationException(str(ex))

            self.validateConfigCacheExists(
                configID=schema["Skim%sConfigCacheID" % skimIndex],
                configCacheUrl=schema['ConfigCacheUrl'],
                couchDBName=schema["CouchDBName"],
                getOutputModules=False)
            if schema["SkimInput%s" % skimIndex] not in mainOutputModules:
                error = "Processing config does not have the following output module: %s." % schema[
                    "SkimInput%s" % skimIndex]
                self.raiseValidationException(msg=error)
            skimInputs.add(schema["SkimInput%s" % skimIndex])
            skimIndex += 1

        # Validate that the transient output modules are used in a skim task
        if "TransientOutputModules" in schema:
            diffSet = set(schema["TransientOutputModules"]) - skimInputs
            if diffSet:
                self.raiseValidationException(
                    msg=
                    "A transient output module was specified but no skim was defined for it"
                )
Ejemplo n.º 20
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Standard StdBase schema validation, plus verification
        of the ConfigCacheID
        """
        DataProcessing.validateSchema(self, schema)
        couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"]
        self.validateConfigCacheExists(configID=schema["ConfigCacheID"],
                                       couchURL=couchUrl,
                                       couchDBName=schema["CouchDBName"])
        return
Ejemplo n.º 21
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Standard StdBase schema validation, plus verification
        of the ConfigCacheID
        """
        DataProcessing.validateSchema(self, schema)
        couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"]
        self.validateConfigCacheExists(configID=schema["ConfigCacheID"],
                                       couchURL=couchUrl,
                                       couchDBName=schema["CouchDBName"])
        return
Ejemplo n.º 22
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a MonteCarloFromGEN workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        # Transform the pileup as required by the CMSSW step
        self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup)
        # Adjust the pileup splitting
        self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup)

        return self.buildWorkload()
Ejemplo n.º 23
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a MonteCarloFromGEN workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        # Transform the pileup as required by the CMSSW step
        self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup)
        # Adjust the pileup splitting
        self.procJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup)

        return self.buildWorkload()
Ejemplo n.º 24
0
    def __init__(self):
        """
        __init__

        Setup parameters that will be later overwritten in the call,
        otherwise pylint will complain about them.
        """
        DataProcessing.__init__(self)
        self.stepTwoMemory = None
        self.stepTwoSizePerEvent = None
        self.stepTwoTimePerEvent = None
        self.stepThreeMemory = None
        self.stepThreeSizePerEvent = None
        self.stepThreeTimePerEvent = None
Ejemplo n.º 25
0
    def __init__(self):
        """
        __init__

        Setup parameters that will be later overwritten in the call,
        otherwise pylint will complain about them.
        """
        DataProcessing.__init__(self)
        self.stepTwoMemory = None
        self.stepTwoSizePerEvent = None
        self.stepTwoTimePerEvent = None
        self.stepThreeMemory = None
        self.stepThreeSizePerEvent = None
        self.stepThreeTimePerEvent = None
Ejemplo n.º 26
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Standard StdBase schema validation, plus verification
        of the ConfigCacheID
        """
        DataProcessing.validateSchema(self, schema)
        self.validateConfigCacheExists(configID=schema["ConfigCacheID"],
                                       configCacheUrl=schema['ConfigCacheUrl'],
                                       couchDBName=schema["CouchDBName"],
                                       getOutputModules=False)

        return
Ejemplo n.º 27
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Check for required fields, and some skim facts
        """
        DataProcessing.validateSchema(self, schema)
        couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"]
        mainOutputModules = self.validateConfigCacheExists(
            configID=schema["ConfigCacheID"],
            couchURL=couchUrl,
            couchDBName=schema["CouchDBName"],
            getOutputModules=True).keys()

        # Skim facts have to be validated outside the usual master validation
        skimArguments = self.getSkimArguments()
        skimIndex = 1
        skimInputs = set()
        while "SkimName%s" % skimIndex in schema:
            instanceArguments = {}
            for argument in skimArguments.keys():
                realArg = argument.replace("#N", str(skimIndex))
                instanceArguments[realArg] = skimArguments[argument]
            msg = validateArgumentsCreate(schema, instanceArguments)
            if msg is not None:
                self.raiseValidationException(msg)

            self.validateConfigCacheExists(
                configID=schema["Skim%sConfigCacheID" % skimIndex],
                couchURL=couchUrl,
                couchDBName=schema["CouchDBName"])
            if schema["SkimInput%s" % skimIndex] not in mainOutputModules:
                error = "Processing config does not have the following output module: %s." % schema[
                    "SkimInput%s" % skimIndex]
                self.raiseValidationException(msg=error)
            skimInputs.add(schema["SkimInput%s" % skimIndex])
            skimIndex += 1

        # Validate that the transient output modules are used in a skim task
        if "TransientOutputModules" in schema:
            diffSet = set(schema["TransientOutputModules"]) - skimInputs
            if diffSet:
                self.raiseValidationException(
                    msg=
                    "A transient output module was specified but no skim was defined for it"
                )
Ejemplo n.º 28
0
 def getWorkloadArguments():
     baseArgs = DataProcessing.getWorkloadArguments()
     specArgs = {"PrimaryDataset" : {"default" : None, "type" : str,
                                     "optional" : True, "validate" : primdataset,
                                     "attr" : "inputPrimaryDataset", "null" : False},
                 "ConfigCacheID" : {"default" : None, "type" : str,
                                    "optional" : False, "validate" : None,
                                    "attr" : "configCacheID", "null" : False}}
     baseArgs.update(specArgs)
     return baseArgs
Ejemplo n.º 29
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Check for required fields, and some skim facts
        """
        DataProcessing.validateSchema(self, schema)
        mainOutputModules = self.validateConfigCacheExists(configID = schema["ConfigCacheID"],
                                                           configCacheUrl = schema['ConfigCacheUrl'],
                                                           couchDBName = schema["CouchDBName"],
                                                           getOutputModules = True).keys()

        # Skim facts have to be validated outside the usual master validation
        skimSchema = {k: v for (k, v) in schema.iteritems() if k.startswith("Skim")}
        skimArguments = self.getSkimArguments()
        skimIndex = 1
        skimInputs = set()
        while "SkimName%s" % skimIndex in schema:
            instanceArguments = {}
            for argument in skimArguments:
                realArg = argument.replace("#N", str(skimIndex))
                instanceArguments[realArg] = skimArguments[argument]
            try:
                validateArgumentsCreate(skimSchema, instanceArguments)
            except Exception as ex:
                self.raiseValidationException(str(ex))

            self.validateConfigCacheExists(configID = schema["Skim%sConfigCacheID" % skimIndex],
                                           configCacheUrl = schema['ConfigCacheUrl'],
                                           couchDBName = schema["CouchDBName"],
                                           getOutputModules=False)
            if schema["SkimInput%s" % skimIndex] not in mainOutputModules:
                error = "Processing config does not have the following output module: %s." % schema["SkimInput%s" % skimIndex]
                self.raiseValidationException(msg = error)
            skimInputs.add(schema["SkimInput%s" % skimIndex])
            skimIndex += 1

        # Validate that the transient output modules are used in a skim task
        if "TransientOutputModules" in schema:
            diffSet = set(schema["TransientOutputModules"]) - skimInputs
            if diffSet:
                self.raiseValidationException(msg = "A transient output module was specified but no skim was defined for it")
Ejemplo n.º 30
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitArgs = {}
        if self.procJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]:
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.procJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.procJobSplitAlgo == "EventAwareLumiBased":
                self.procJobSplitArgs[
                    "job_time_limit"] = 96 * 3600  # 4 days in seconds
                self.procJobSplitArgs["allowCreationFailure"] = False
        elif self.procJobSplitAlgo == "LumiBased":
            self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.procJobSplitAlgo == "FileBased":
            self.procJobSplitArgs["files_per_job"] = self.filesPerJob

        self.skimJobSplitArgs = {}
        if self.skimJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]:
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            if self.skimJobSplitAlgo == "EventAwareLumiBased":
                self.skimJobSplitArgs["job_time_limit"] = 48 * 3600  # 2 days
                self.skimJobSplitArgs["allowCreationFailure"] = False
            self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob
        elif self.skimJobSplitAlgo == "LumiBased":
            self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.skimJobSplitAlgo == "FileBased":
            self.skimJobSplitArgs["files_per_job"] = self.filesPerJob
        self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {
            "files_per_job": 1,
            "include_parents": True
        })

        return self.buildWorkload()
Ejemplo n.º 31
0
    def getWorkloadArguments():

        baseArgs = DataProcessing.getWorkloadArguments()
        specArgs = {"TransientOutputModules" : {"default" : [], "type" : makeList,
                                                "optional" : True, "validate" : None,
                                                "attr" : "transientModules", "null" : False},
                    "ConfigCacheID" : {"default" : None, "type" : str,
                                       "optional" : False, "validate" : None,
                                       "attr" : "configCacheID", "null" : False}}
        baseArgs.update(specArgs)
        return baseArgs
Ejemplo n.º 32
0
    def getWorkloadArguments():

        baseArgs = DataProcessing.getWorkloadArguments()
        specArgs = {"TransientOutputModules" : {"default" : [], "type" : makeList,
                                                "optional" : True, "validate" : None,
                                                "attr" : "transientModules", "null" : False},
                    "ConfigCacheID" : {"default" : None, "type" : str,
                                       "optional" : False, "validate" : None,
                                       "attr" : "configCacheID", "null" : False}}
        baseArgs.update(specArgs)
        return baseArgs
Ejemplo n.º 33
0
    def validateSchema(self, schema):
        """
        _validateSchema_

        Check for required fields, and some skim facts
        """
        DataProcessing.validateSchema(self, schema)
        couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"]
        mainOutputModules = self.validateConfigCacheExists(configID = schema["ConfigCacheID"],
                                                           couchURL = couchUrl,
                                                           couchDBName = schema["CouchDBName"],
                                                           getOutputModules = True).keys()

        # Skim facts have to be validated outside the usual master validation
        skimArguments = self.getSkimArguments()
        skimIndex = 1
        skimInputs = set()
        while "SkimName%s" % skimIndex in schema:
            instanceArguments = {}
            for argument in skimArguments.keys():
                realArg = argument.replace("#N", str(skimIndex))
                instanceArguments[realArg] = skimArguments[argument]
            msg = validateArguments(schema, instanceArguments)
            if msg is not None:
                self.raiseValidationException(msg)

            self.validateConfigCacheExists(configID = schema["Skim%sConfigCacheID" % skimIndex],
                                           couchURL = couchUrl,
                                           couchDBName = schema["CouchDBName"])
            if schema["SkimInput%s" % skimIndex] not in mainOutputModules:
                error = "Processing config does not have the following output module: %s." % schema["SkimInput%s" % skimIndex]
                self.raiseValidationException(msg = error)
            skimInputs.add(schema["SkimInput%s" % skimIndex])
            skimIndex += 1

        # Validate that the transient output modules are used in a skim task
        if "TransientOutputModules" in schema:
            diffSet = set(schema["TransientOutputModules"]) - skimInputs
            if diffSet:
                self.raiseValidationException(msg = "A transient output module was specified but no skim was defined for it")
Ejemplo n.º 34
0
 def getWorkloadArguments():
     baseArgs = DataProcessing.getWorkloadArguments()
     specArgs = {"StepOneOutputModuleName" : {"default" : None, "type" : str,
                                              "optional" : True, "validate" : None,
                                              "attr" : "stepOneOutputModuleName", "null" : False},
                 "StepTwoOutputModuleName" : {"default" : None, "type" : str,
                                              "optional" : True, "validate" : None,
                                              "attr" : "stepTwoOutputModuleName", "null" : False},
                 "StepOneConfigCacheID" : {"default" : None, "type" : str,
                                           "optional" : False, "validate" : None,
                                           "attr" : "stepOneConfigCacheID", "null" : False},
                 "StepTwoConfigCacheID" : {"default" : None, "type" : str,
                                           "optional" : True, "validate" : None,
                                           "attr" : "stepTwoConfigCacheID", "null" : False},
                 "StepThreeConfigCacheID" : {"default" : None, "type" : str,
                                             "optional" : True, "validate" : None,
                                             "attr" : "stepThreeConfigCacheID", "null" : False},
                 "KeepStepOneOutput" : {"default" : True, "type" : strToBool,
                                        "optional" : True, "validate" : None,
                                        "attr" : "keepStepOneOutput", "null" : False},
                 "KeepStepTwoOutput" : {"default" : True, "type" : strToBool,
                                        "optional" : True, "validate" : None,
                                        "attr" : "keepStepTwoOutput", "null" : False},
                 "StepTwoTimePerEvent" : {"default" : None, "type" : float,
                                          "optional" : True, "validate" : lambda x : x > 0,
                                          "attr" : "stepTwoTimePerEvent", "null" : False},
                 "StepThreeTimePerEvent" : {"default" : None, "type" : float,
                                            "optional" : True, "validate" : lambda x : x > 0,
                                            "attr" : "stepThreeTimePerEvent", "null" : False},
                 "StepTwoSizePerEvent" : {"default" : None, "type" : float,
                                          "optional" : True, "validate" : lambda x : x > 0,
                                          "attr" : "stepTwoSizePerEvent", "null" : False},
                 "StepThreeSizePerEvent" : {"default" : None, "type" : float,
                                            "optional" : True, "validate" : lambda x : x > 0,
                                            "attr" : "stepThreeSizePerEvent", "null" : False},
                 "StepTwoMemory" : {"default" : None, "type" : float,
                                    "optional" : True, "validate" : lambda x : x > 0,
                                    "attr" : "stepTwoMemory", "null" : False},
                 "StepThreeMemory" : {"default" : None, "type" : float,
                                      "optional" : True, "validate" : lambda x : x > 0,
                                      "attr" : "stepThreeMemory", "null" : False},
                 "MCPileup" : {"default" : None, "type" : str,
                               "optional" : True, "validate" : dataset,
                               "attr" : "mcPileup", "null" : False},
                 "DataPileup" : {"default" : None, "type" : str,
                                 "optional" : True, "validate" : dataset,
                                 "attr" : "dataPileup", "null" : False},
                 "DeterministicPileup" : {"default" : False, "type" : strToBool,
                                          "optional" : True, "validate" : None,
                                          "attr" : "deterministicPileup", "null" : False}}
     baseArgs.update(specArgs)
     return baseArgs
Ejemplo n.º 35
0
    def getWorkloadCreateArgs():
        baseArgs = DataProcessing.getWorkloadCreateArgs()
        specArgs = {"RequestType": {"default": "PromptReco", "optional": True},
                    "ConfigCacheID": {"optional": True, "null": True},
                    "Scenario": {"default": None, "optional": False,
                                 "attr": "procScenario", "null": False},
                    "ProcessingString": {"default": "", "validate": procstringT0},
                    "WriteTiers": {"default": ["RECO", "AOD", "DQM", "ALCARECO"],
                                   "type": makeList, "optional": False, "null": False},
                    "AlcaSkims": {"default": ["TkAlCosmics0T", "MuAlGlobalCosmics", "HcalCalHOCosmics"],
                                  "type": makeList, "optional": False, "null": False},
                    "PhysicsSkims": {"default": [], "type": makeList,
                                     "optional": True, "null": False},
                    "InitCommand": {"default": None, "optional": True, "null": True},
                    "EnvPath": {"default": None, "optional": True, "null": True},
                    "BinPath": {"default": None, "optional": True, "null": True},
                    "DoLogCollect": {"default": True, "type": strToBool,
                                     "optional": True, "null": False},
                    "SplittingAlgo": {"default": "EventAwareLumiBased", "null": False,
                                      "validate": lambda x: x in ["EventBased", "LumiBased",
                                                                  "EventAwareLumiBased", "FileBased"],
                                      "attr": "procJobSplitAlgo"},
                    "EventsPerJob": {"default": 500, "type": int, "validate": lambda x: x > 0,
                                     "null": False},
                    "SkimSplittingAlgo": {"default": "FileBased", "null": False,
                                          "validate": lambda x: x in ["EventBased", "LumiBased",
                                                                      "EventAwareLumiBased", "FileBased"],
                                          "attr": "skimJobSplitAlgo"},
                    "SkimEventsPerJob": {"default": 500, "type": int, "validate": lambda x: x > 0,
                                         "null": False},
                    "SkimLumisPerJob": {"default": 8, "type": int, "validate": lambda x: x > 0,
                                        "null": False},
                    "SkimFilesPerJob": {"default": 1, "type": int, "validate": lambda x: x > 0,
                                        "null": False},
                    }

        baseArgs.update(specArgs)
        DataProcessing.setDefaultArgumentsProperty(baseArgs)
        return baseArgs
Ejemplo n.º 36
0
    def getWorkloadCreateArgs():
        baseArgs = DataProcessing.getWorkloadCreateArgs()
        specArgs = {"RequestType": {"default": "PromptReco", "optional": True},
                    "ConfigCacheID": {"optional": True, "null": True},
                    "Scenario": {"default": None, "optional": False,
                                 "attr": "procScenario", "null": False},
                    "ProcessingString": {"default": "", "validate": procstringT0},
                    "WriteTiers": {"default": ["RECO", "AOD", "DQM", "ALCARECO"],
                                   "type": makeList, "optional": False, "null": False},
                    "AlcaSkims": {"default": ["TkAlCosmics0T", "MuAlGlobalCosmics", "HcalCalHOCosmics"],
                                  "type": makeList, "optional": False, "null": False},
                    "PhysicsSkims": {"default": [], "type": makeList,
                                     "optional": True, "null": False},
                    "InitCommand": {"default": None, "optional": True, "null": True},
                    "EnvPath": {"default": None, "optional": True, "null": True},
                    "BinPath": {"default": None, "optional": True, "null": True},
                    "DoLogCollect": {"default": True, "type": strToBool,
                                     "optional": True, "null": False},
                    "SplittingAlgo": {"default": "EventAwareLumiBased", "null": False,
                                      "validate": lambda x: x in ["EventBased", "LumiBased",
                                                                  "EventAwareLumiBased", "FileBased"],
                                      "attr": "procJobSplitAlgo"},
                    "EventsPerJob": {"default": 500, "type": int, "validate": lambda x: x > 0,
                                     "null": False},
                    "SkimSplittingAlgo": {"default": "FileBased", "null": False,
                                          "validate": lambda x: x in ["EventBased", "LumiBased",
                                                                      "EventAwareLumiBased", "FileBased"],
                                          "attr": "skimJobSplitAlgo"},
                    "SkimEventsPerJob": {"default": 500, "type": int, "validate": lambda x: x > 0,
                                         "null": False},
                    "SkimLumisPerJob": {"default": 8, "type": int, "validate": lambda x: x > 0,
                                        "null": False},
                    "SkimFilesPerJob": {"default": 1, "type": int, "validate": lambda x: x > 0,
                                        "null": False},
                    }

        baseArgs.update(specArgs)
        DataProcessing.setDefaultArgumentsProperty(baseArgs)
        return baseArgs
Ejemplo n.º 37
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitArgs = {}
        if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased":
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.procJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.procJobSplitAlgo == "EventAwareLumiBased":
                self.procJobSplitArgs["max_events_per_lumi"] = 100000
        elif self.procJobSplitAlgo == "LumiBased":
            self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.procJobSplitAlgo == "FileBased":
            self.procJobSplitArgs["files_per_job"] = self.filesPerJob
        self.skimJobSplitArgs = {}
        if self.skimJobSplitAlgo == "EventBased" or self.skimJobSplitAlgo == "EventAwareLumiBased":
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.skimJobSplitAlgo == "EventAwareLumiBased":
                self.skimJobSplitArgs["max_events_per_lumi"] = 20000
        elif self.skimJobSplitAlgo == "LumiBased":
            self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.skimJobSplitAlgo == "FileBased":
            self.skimJobSplitArgs["files_per_job"] = self.filesPerJob
        self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {
            "files_per_job": 1,
            "include_parents": True
        })

        return self.buildWorkload()
Ejemplo n.º 38
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        DataProcessing.__call__(self, workloadName, arguments)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitArgs = {}
        if self.procJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]:
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.procJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.procJobSplitAlgo == "EventAwareLumiBased":
                self.procJobSplitArgs["job_time_limit"] = 96 * 3600  # 4 days in seconds
        elif self.procJobSplitAlgo == "LumiBased":
            self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.procJobSplitAlgo == "FileBased":
            self.procJobSplitArgs["files_per_job"] = self.filesPerJob

        self.skimJobSplitArgs = {}
        if self.skimJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]:
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            if self.skimJobSplitAlgo == "EventAwareLumiBased":
                self.skimJobSplitArgs["job_time_limit"] = 48 * 3600  # 2 days
            self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob
        elif self.skimJobSplitAlgo == "LumiBased":
            self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.skimJobSplitAlgo == "FileBased":
            self.skimJobSplitArgs["files_per_job"] = self.filesPerJob
        self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs",
                                              {"files_per_job": 1,
                                               "include_parents": True})

        return self.buildWorkload()
Ejemplo n.º 39
0
 def getWorkloadCreateArgs():
     baseArgs = DataProcessing.getWorkloadCreateArgs()
     specArgs = {
         "RequestType": {
             "default": "DQMHarvest",
             "optional": True
         },
         "ConfigCacheID": {
             "optional": True,
             "null": True
         },
         "DQMConfigCacheID": {
             "optional": False,
             "attr": "dqmConfigCacheID"
         },
         "DQMUploadUrl": {
             "optional": False,
             "attr": "dqmUploadUrl"
         },
     }
     baseArgs.update(specArgs)
     DataProcessing.setDefaultArgumentsProperty(baseArgs)
     return baseArgs