예제 #1
0
파일: PromptReco.py 프로젝트: emaszs/WMCore
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitArgs = {}
        if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased":
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.procJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.procJobSplitAlgo == "EventAwareLumiBased":
                self.procJobSplitArgs["max_events_per_lumi"] = 100000
        elif self.procJobSplitAlgo == "LumiBased":
            self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.procJobSplitAlgo == "FileBased":
            self.procJobSplitArgs["files_per_job"] = self.filesPerJob
        self.skimJobSplitArgs = {}
        if self.skimJobSplitAlgo == "EventBased" or self.skimJobSplitAlgo == "EventAwareLumiBased":
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.skimJobSplitAlgo == "EventAwareLumiBased":
                self.skimJobSplitArgs["max_events_per_lumi"] = 20000
        elif self.skimJobSplitAlgo == "LumiBased":
            self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.skimJobSplitAlgo == "FileBased":
            self.skimJobSplitArgs["files_per_job"] = self.filesPerJob
        self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {"files_per_job": 1, "include_parents": True})

        return self.buildWorkload()
예제 #2
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a MonteCarloFromGEN workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.inputDataset = arguments["InputDataset"]
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        self.procConfigCacheID = arguments.get("ProcConfigCacheID")

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitAlgo  = arguments.get("StdJobSplitAlgo", "LumiBased")
        self.procJobSplitArgs  = arguments.get("StdJobSplitArgs", {"lumis_per_job": 1})
        return self.buildWorkload()
예제 #3
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a DQMHarvest workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        self.workload = self.createWorkload()

        self.workload.setDashboardActivity("harvesting")
        self.reportWorkflowToDashboard(self.workload.getDashboardActivity())

        self.workload.setWorkQueueSplitPolicy("Dataset", "FileBased", {"files_per_job": 99999})

        # also creates the logCollect job by default
        self.addDQMHarvestTask(uploadProxy=self.dqmUploadProxy,
                               periodic_harvest_interval=self.periodicHarvestInterval,
                               dqmHarvestUnit=self.dqmHarvestUnit)

        # setting the parameters which need to be set for all the tasks
        # sets acquisitionEra, processingVersion, processingString
        self.workload.setTaskPropertiesFromWorkload()

        return self.workload
예제 #4
0
파일: TaskChain.py 프로젝트: stuartw/WMCore
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()
        self.arguments = arguments
        self.couchURL = arguments['CouchURL']
        self.couchDBName = arguments['CouchDBName']
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments.get("GlobalTag", None)

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.emulation = arguments.get("Emulation", False)
        
        numTasks = arguments['TaskChain']
        for i in range(1, numTasks+1):
            #consistency check that there are numTasks defined in the request:
            if not arguments.has_key("Task%s" % i):
                msg = "Specified number of tasks: %s does not match defined task dictionary for Task%s" % (i, i)
                raise RuntimeError, msg
                
            taskConf = getTaskN(arguments, i)
            parent = parentTaskName(taskConf)

            # Set task-specific global parameters
            self.blockBlacklist = taskConf.get("BlockBlacklist", [])
            self.blockWhitelist = taskConf.get("BlockWhitelist", [])
            self.runBlacklist   = taskConf.get("RunBlacklist", [])
            self.runWhitelist   = taskConf.get("RunWhitelist", [])

            parentTask = None
            if parent in self.mergeMapping:
                parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)]
                
            task = self.makeTask(taskConf, parentTask)
            if i == 1:
                # First task will either be generator or processing
                self.workload.setDashboardActivity("relval")
                if isGenerator(arguments):
                    # generate mc events
                    self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgorithm'], 
                                                          taskConf['SplittingArguments'])
                    self.workload.setEndPolicy("SingleShot")
                    self.setupGeneratorTask(task, taskConf)
                else:
                    # process an existing dataset
                    self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgorithm'],
                                                     taskConf['SplittingArguments'])
                    self.setupTask(task, taskConf)
                self.reportWorkflowToDashboard(self.workload.getDashboardActivity())
            else:
                # all subsequent tasks have to be processing tasks
                self.setupTask(task, taskConf)
            self.taskMapping[task.name()] = taskConf
            
        return self.workload  
예제 #5
0
    def __call__(self, workloadName, arguments):
        """
        Create a workload instance for an Analysis request

        """
        StdBase.__call__(self, workloadName, arguments)

        self.minMergeSize = 1

        if self.Lumis and self.analysisJobSplitAlgo not in ['LumiBased']:
            raise RuntimeError('Running on selected lumis only supported in split mode(s) %s' %
                               'LumiBased')

        if self.analysisJobSplitAlgo == 'EventBased':
            self.analysisJobSplitArgs = {'events_per_job' : self.eventsPerJob}
        elif self.analysisJobSplitAlgo == 'LumiBased':
            self.analysisJobSplitArgs = {'lumis_per_job' : self.lumisPerJob}
            if self.Lumis:
                self.analysisJobSplitArgs.update({'lumis' : self.Lumis})
                self.analysisJobSplitArgs.update({'runs'  : self.Runs})
            self.analysisJobSplitArgs.update(
                           {'halt_job_on_file_boundaries' : False,
                            'splitOnRun' : False,
                           })

        return self.buildWorkload()
예제 #6
0
파일: Repack.py 프로젝트: johnhcasallasl/T0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a Repack workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.outputs = arguments['Outputs']

        # job splitting parameters
        self.repackSplitArgs = {}
        self.repackSplitArgs['maxSizeSingleLumi'] = arguments['MaxSizeSingleLumi']
        self.repackSplitArgs['maxSizeMultiLumi'] = arguments['MaxSizeMultiLumi']
        self.repackSplitArgs['maxInputEvents'] = arguments['MaxInputEvents']
        self.repackSplitArgs['maxInputFiles'] = arguments['MaxInputFiles']
        self.repackSplitArgs['maxLatency'] = arguments['MaxLatency']
        self.repackMergeSplitArgs = {}
        self.repackMergeSplitArgs['minInputSize'] = arguments['MinInputSize']
        self.repackMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize']
        self.repackMergeSplitArgs['maxEdmSize'] = arguments['MaxEdmSize']
        self.repackMergeSplitArgs['maxOverSize'] = arguments['MaxOverSize']
        self.repackMergeSplitArgs['maxInputEvents'] = arguments['MaxInputEvents']
        self.repackMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles']
        self.repackMergeSplitArgs['maxLatency'] = arguments['MaxLatency']

        return self.buildWorkload()
예제 #7
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a DQMHarvest workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        self.workload = self.createWorkload()

        self.workload.setDashboardActivity("harvesting")
        self.reportWorkflowToDashboard(self.workload.getDashboardActivity())

        splitArgs = {"runs_per_job": 1}
        if self.dqmHarvestUnit == "multiRun":
            # then it should result in a single job in the end, very high number of runs
            splitArgs['runs_per_job'] = 999999
        self.workload.setWorkQueueSplitPolicy("Dataset", "Harvest", splitArgs)

        # also creates the logCollect job by default
        self.addDQMHarvestTask(uploadProxy=self.dqmUploadProxy,
                               periodic_harvest_interval=self.periodicHarvestInterval,
                               dqmHarvestUnit=self.dqmHarvestUnit)

        # setting the parameters which need to be set for all the tasks
        # sets acquisitionEra, processingVersion, processingString
        self.workload.setTaskPropertiesFromWorkload()

        return self.workload
예제 #8
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()

        for i in range(1, self.taskChain + 1):

            originalTaskConf = arguments["Task%d" % i]
            taskConf = {}
            # Make a shallow copy of the taskConf
            for k, v in originalTaskConf.items():
                taskConf[k] = v
            parent = taskConf.get("InputTask", None)

            self.modifyTaskConfiguration(taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf)

            # Set task-specific global parameters
            self.blockBlacklist = taskConf["BlockBlacklist"]
            self.blockWhitelist = taskConf["BlockWhitelist"]
            self.runBlacklist = taskConf["RunBlacklist"]
            self.runWhitelist = taskConf["RunWhitelist"]

            if taskConf['Multicore'] and taskConf['Multicore'] != 'None':
                self.multicoreNCores = int(taskConf['Multicore'])

            parentTask = None
            if parent in self.mergeMapping:
                parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)]

            task = self.makeTask(taskConf, parentTask)

            if i == 1:
                # First task will either be generator or processing
                self.workload.setDashboardActivity("relval")
                if isGenerator(arguments):
                    # generate mc events
                    self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'],
                                                          taskConf['SplittingArguments'])
                    self.workload.setEndPolicy("SingleShot")
                    self.setupGeneratorTask(task, taskConf)
                else:
                    # process an existing dataset
                    self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'],
                                                          taskConf['SplittingArguments'])
                    self.setupTask(task, taskConf)
                self.reportWorkflowToDashboard(self.workload.getDashboardActivity())
            else:
                # all subsequent tasks have to be processing tasks
                self.setupTask(task, taskConf)
            self.taskMapping[task.name()] = taskConf

        self.workload.ignoreOutputModules(self.ignoredOutputModules)

        return self.workload
예제 #9
0
    def __call__(self, workloadName, arguments):
        """
        __call__

        Create a StepChain workload with the given parameters.
        Configures the workload based on the first task information,
        then properly sets up the remaining tasks.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()

        # Update the task configuration
        taskConf = {}
        for k, v in arguments["Step1"].iteritems():
            taskConf[k] = v
        self.modifyTaskConfiguration(taskConf, True, 'InputDataset'
                                     not in taskConf)

        self.inputPrimaryDataset = self.getStepValue('PrimaryDataset',
                                                     taskConf,
                                                     self.primaryDataset)
        self.blockBlacklist = taskConf["BlockBlacklist"]
        self.blockWhitelist = taskConf["BlockWhitelist"]
        self.runBlacklist = taskConf["RunBlacklist"]
        self.runWhitelist = taskConf["RunWhitelist"]
        self.splittingAlgo = taskConf['SplittingAlgo']

        # Create the first task
        firstTask = self.workload.newTask(taskConf['StepName'])

        # Create a proper task and set workload level arguments
        if isGenerator(arguments):
            self.workload.setDashboardActivity("production")
            self.workload.setWorkQueueSplitPolicy(
                "MonteCarlo", taskConf['SplittingAlgo'],
                taskConf['SplittingArguments'])
            self.workload.setEndPolicy("SingleShot")
            self.setupGeneratorTask(firstTask, taskConf)
        else:
            self.workload.setDashboardActivity("processing")
            self.workload.setWorkQueueSplitPolicy(
                "Block", taskConf['SplittingAlgo'],
                taskConf['SplittingArguments'])
            self.setupTask(firstTask, taskConf)

        # Now modify this task to add the next steps
        if self.stepChain > 1:
            self.setupNextSteps(firstTask, arguments)

        self.workload.setStepMapping(self.stepMapping)
        self.reportWorkflowToDashboard(self.workload.getDashboardActivity())

        # Feed values back to save in couch
        if self.eventsPerJob:
            arguments['Step1']['EventsPerJob'] = self.eventsPerJob
        if self.eventsPerLumi:
            arguments['Step1']['EventsPerLumi'] = self.eventsPerLumi
        return self.workload
예제 #10
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()

        for i in range(1, self.taskChain + 1):

            originalTaskConf = arguments["Task%d" % i]
            taskConf = {}
            # Make a shallow copy of the taskConf
            for k,v in originalTaskConf.items():
                taskConf[k] = v
            parent = taskConf.get("InputTask", None)

            self.modifyTaskConfiguration(taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf)

            # Set task-specific global parameters
            self.blockBlacklist = taskConf["BlockBlacklist"]
            self.blockWhitelist = taskConf["BlockWhitelist"]
            self.runBlacklist   = taskConf["RunBlacklist"]
            self.runWhitelist   = taskConf["RunWhitelist"]
            
            if taskConf['Multicore'] and taskConf['Multicore'] != 'None':
                self.multicoreNCores = int(taskConf['Multicore'])

            parentTask = None
            if parent in self.mergeMapping:
                parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)]
                
            task = self.makeTask(taskConf, parentTask)

            if i == 1:
                # First task will either be generator or processing
                self.workload.setDashboardActivity("relval")
                if isGenerator(arguments):
                    # generate mc events
                    self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'],
                                                          taskConf['SplittingArguments'])
                    self.workload.setEndPolicy("SingleShot")
                    self.setupGeneratorTask(task, taskConf)
                else:
                    # process an existing dataset
                    self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'],
                                                     taskConf['SplittingArguments'])
                    self.setupTask(task, taskConf)
                self.reportWorkflowToDashboard(self.workload.getDashboardActivity())
            else:
                # all subsequent tasks have to be processing tasks
                self.setupTask(task, taskConf)
            self.taskMapping[task.name()] = taskConf

        self.workload.ignoreOutputModules(self.ignoredOutputModules)

        return self.workload
예제 #11
0
파일: PromptReco.py 프로젝트: cinquo/WMCore
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.frameworkVersion = arguments['CMSSWVersion']
        self.globalTag = arguments['GlobalTag']
        self.writeTiers = arguments['WriteTiers']
        self.alcaSkims = arguments['AlcaSkims']
        self.inputDataset = arguments['InputDataset']
        self.promptSkims = arguments['PromptSkims']
        self.couchURL = arguments['CouchURL']
        self.couchDBName = arguments['CouchDBName']
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)
        self.initCommand = arguments['InitCommand']

        #Optional parameters
        self.envPath = arguments.get('EnvPath', None)
        self.binPath = arguments.get('BinPath', None)

        if arguments.has_key('Multicore'):
            numCores = arguments.get('Multicore')
            if numCores == None or numCores == "":
                self.multicore = False
            elif numCores == "auto":
                self.multicore = True
                self.multicoreNCores = "auto"
            else:
                self.multicore = True
                self.multicoreNCores = numCores

        # Do we run log collect ? (Tier0 does not support it yet)
        self.doLogCollect = arguments.get("DoLogCollect", True)

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "EventBased")
        self.procJobSplitArgs = arguments.get("StdJobSplitArgs",
                                              {"events_per_job": 500})
        self.skimJobSplitAlgo = arguments.get("SkimJobSplitAlgo", "FileBased")
        self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs",
                                              {"files_per_job": 1,
                                               "include_parents": True})

        return self.buildWorkload()
예제 #12
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a DataProcessing workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.inputDataset = arguments["InputDataset"]
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]        

        # One of these parameters must be set.
        if arguments.has_key("ProdConfigCacheID"):
            self.procConfigCacheID = arguments["ProdConfigCacheID"]
        else:
            self.procConfigCacheID = arguments.get("ProcConfigCacheID", None)

        if arguments.has_key("Scenario"):
            self.procScenario = arguments.get("Scenario", None)
        else:
            self.procScenario = arguments.get("ProcScenario", None)

        if arguments.has_key("Multicore"):
            numCores = arguments.get("Multicore")
            if numCores == None or numCores == "":
                self.multicore = False
            elif numCores == "auto":
                self.multicore = True
                self.multicoreNCores = "auto"
            else:
                self.multicore = True
                self.multicoreNCores = numCores

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitAlgo  = arguments.get("StdJobSplitAlgo", "LumiBased")
        self.procJobSplitArgs  = arguments.get("StdJobSplitArgs",
                                               {"lumis_per_job": 8,
                                                "include_parents": self.includeParents})
        return self.buildWorkload()
예제 #13
0
파일: MonteCarlo.py 프로젝트: cinquo/WMCore
    def __call__(self, workloadName, arguments):
        """
        Create a workload instance for a MonteCarlo request

        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.inputPrimaryDataset = arguments["PrimaryDataset"]
        self.frameworkVersion    = arguments["CMSSWVersion"]
        self.globalTag           = arguments["GlobalTag"]
        self.seeding             = arguments.get("Seeding", "AutomaticSeeding")
        self.configCacheID   = arguments["ConfigCacheID"]

        # Splitting arguments
        timePerEvent     = int(arguments.get("TimePerEvent", 60))
        filterEfficiency = float(arguments.get("FilterEfficiency", 1.0))
        totalTime        = int(arguments.get("TotalTime", 9 * 3600))
        self.totalEvents = int(int(arguments["RequestNumEvents"]) / filterEfficiency)
        self.firstEvent  = int(arguments.get("FirstEvent", 1))
        self.firstLumi   = int(arguments.get("FirstLumi", 1))
        # We don't write out every event in MC, adjust the size per event accordingly
        self.sizePerEvent = self.sizePerEvent * filterEfficiency

        # pileup configuration for the first generation task
        self.pileupConfig = arguments.get("PileupConfig", None)

        #Events per lumi configuration (Allow others to inherit)
        self.eventsPerLumi = arguments.get("EventsPerLumi", None)
        if self.eventsPerLumi != None:
            self.eventsPerLumi = int(self.eventsPerLumi)

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)
        
        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.emulation = arguments.get("Emulation", False)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        eventsPerJob = int(totalTime/timePerEvent/filterEfficiency)
        self.prodJobSplitAlgo  = arguments.get("ProdJobSplitAlgo", "EventBased")
        self.prodJobSplitArgs  = arguments.get("ProdJobSplitArgs",
                                               {"events_per_job": eventsPerJob})
        self.previousJobCount  = 0
        if self.firstEvent > 1 or self.firstLumi > 1:
            self.previousJobCount = int(math.ceil(self.firstEvent/float(self.prodJobSplitArgs["events_per_job"])))
            self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount
        
        return self.buildWorkload()
예제 #14
0
    def __call__(self, workloadName, arguments):
        """
        __call__

        Create a StepChain workload with the given parameters.
        Configures the workload based on the first task information,
        then properly sets up the remaining tasks.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()

        # Update the task configuration
        taskConf = {}
        for k, v in arguments["Step1"].iteritems():
            taskConf[k] = v
        self.modifyTaskConfiguration(taskConf, True, 'InputDataset'
                                     not in taskConf)
        if taskConf['Multicore'] and taskConf['Multicore'] != 'None':
            self.multicoreNCores = int(taskConf['Multicore'])
        self.inputPrimaryDataset = taskConf.get("PrimaryDataset",
                                                self.primaryDataset)
        self.blockBlacklist = taskConf["BlockBlacklist"]
        self.blockWhitelist = taskConf["BlockWhitelist"]
        self.runBlacklist = taskConf["RunBlacklist"]
        self.runWhitelist = taskConf["RunWhitelist"]
        self.splittingAlgo = taskConf['SplittingAlgo']

        # Create the first task
        firstTask = self.workload.newTask(taskConf['StepName'])

        # Create a proper task and set workload level arguments
        if isGenerator(arguments):
            self.workload.setDashboardActivity("production")
            self.workload.setWorkQueueSplitPolicy(
                "MonteCarlo", taskConf['SplittingAlgo'],
                taskConf['SplittingArguments'])
            self.workload.setEndPolicy("SingleShot")
            self.setupGeneratorTask(firstTask, taskConf)
        else:
            self.workload.setDashboardActivity("processing")
            self.workload.setWorkQueueSplitPolicy(
                "Block", taskConf['SplittingAlgo'],
                taskConf['SplittingArguments'])
            self.setupTask(firstTask, taskConf)
        self.reportWorkflowToDashboard(self.workload.getDashboardActivity())

        # Now modify this task to add the next steps
        if self.stepChain > 1:
            self.setupNextSteps(firstTask, arguments)

        # All tasks need to have this parameter set
        self.workload.setTaskPropertiesFromWorkload()

        return self.workload
예제 #15
0
    def __call__(self, workloadName, arguments):
        """
        __call__

        Create a StepChain workload with the given parameters.
        Configures the workload based on the first task information,
        then properly sets up the remaining tasks.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()

        # Update the task configuration
        taskConf = {}
        for k, v in arguments["Step1"].iteritems():
            taskConf[k] = v
        self.modifyTaskConfiguration(taskConf, True, 'InputDataset' not in taskConf)

        self.inputPrimaryDataset = self.getStepValue('PrimaryDataset', taskConf, self.primaryDataset)
        self.blockBlacklist = taskConf["BlockBlacklist"]
        self.blockWhitelist = taskConf["BlockWhitelist"]
        self.runBlacklist = taskConf["RunBlacklist"]
        self.runWhitelist = taskConf["RunWhitelist"]
        self.splittingAlgo = taskConf['SplittingAlgo']

        # Create the first task
        firstTask = self.workload.newTask(taskConf['StepName'])

        # Create a proper task and set workload level arguments
        if isGenerator(arguments):
            self.workload.setDashboardActivity("production")
            self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'],
                                                  taskConf['SplittingArguments'])
            self.workload.setEndPolicy("SingleShot")
            self.setupGeneratorTask(firstTask, taskConf)
        else:
            self.workload.setDashboardActivity("processing")
            self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'],
                                                  taskConf['SplittingArguments'])
            self.setupTask(firstTask, taskConf)

        # Now modify this task to add the next steps
        if self.stepChain > 1:
            self.setupNextSteps(firstTask, arguments)

        self.workload.setStepMapping(self.stepMapping)
        self.reportWorkflowToDashboard(self.workload.getDashboardActivity())

        # Feed values back to save in couch
        if self.eventsPerJob:
            arguments['Step1']['EventsPerJob'] = self.eventsPerJob
        if self.eventsPerLumi:
            arguments['Step1']['EventsPerLumi'] = self.eventsPerLumi
        return self.workload
예제 #16
0
    def __call__(self, workloadName, arguments):
        """
        Create a workload instance for a MonteCarlo request

        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.inputPrimaryDataset = arguments["PrimaryDataset"]
        self.frameworkVersion    = arguments["CMSSWVersion"]
        self.globalTag           = arguments["GlobalTag"]
        self.seeding             = arguments.get("Seeding", "AutomaticSeeding")
        self.configCacheID   = arguments["ConfigCacheID"]

        # Splitting arguments
        timePerEvent     = int(arguments.get("TimePerEvent", 60))
        filterEfficiency = float(arguments.get("FilterEfficiency", 1.0))
        totalTime        = int(arguments.get("TotalTime", 9 * 3600))
        self.totalEvents = int(int(arguments["RequestNumEvents"]) / filterEfficiency)
        self.firstEvent  = int(arguments.get("FirstEvent", 1))
        self.firstLumi   = int(arguments.get("FirstLumi", 1))

        # pileup configuration for the first generation task
        self.pileupConfig = arguments.get("PileupConfig", None)

        #Events per lumi configuration (Allow others to inherit)
        self.eventsPerLumi = arguments.get("EventsPerLumi", None)
        if self.eventsPerLumi != None:
            self.eventsPerLumi = int(self.eventsPerLumi)

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)
        
        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.emulation = arguments.get("Emulation", False)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        eventsPerJob = int(totalTime/timePerEvent/filterEfficiency)
        self.prodJobSplitAlgo  = arguments.get("ProdJobSplitAlgo", "EventBased")
        self.prodJobSplitArgs  = arguments.get("ProdJobSplitArgs",
                                               {"events_per_job": eventsPerJob})
        self.previousJobCount  = 0
        if self.firstEvent > 1 or self.firstLumi > 1:
            self.previousJobCount = int(math.ceil(self.firstEvent/float(self.prodJobSplitArgs["events_per_job"])))
            self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount
        
        return self.buildWorkload()
예제 #17
0
파일: Express.py 프로젝트: dballesteros7/T0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a Express workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]
	self.globalTagTransaction = arguments["GlobalTagTransaction"]
        self.procScenario = arguments['ProcScenario']
        self.alcaSkims = arguments['AlcaSkims']
        self.dqmSequences = arguments['DqmSequences']
        self.outputs = arguments['Outputs']
        self.dqmUploadProxy = arguments['DQMUploadProxy']
        self.alcaHarvestTimeout = arguments['AlcaHarvestTimeout']
        self.alcaHarvestDir = arguments['AlcaHarvestDir']
        self.streamName = arguments['StreamName']

        # job splitting parameters (also required parameters)
        self.expressSplitArgs = {}
        self.expressSplitArgs['maxInputEvents'] = arguments['MaxInputEvents']
        self.expressMergeSplitArgs = {}
        self.expressMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize']
        self.expressMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles']
        self.expressMergeSplitArgs['maxLatency'] = arguments['MaxLatency']

        if arguments.has_key("Multicore"):
            numCores = arguments.get("Multicore")
            if numCores == None or numCores == "":
                self.multicore = False
            elif numCores == "auto":
                self.multicore = True
                self.multicoreNCores = "auto"
            else:
                self.multicore = True
                self.multicoreNCores = numCores

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        # fixed parameters that are used in various places
        self.alcaHarvestOutLabel = "Sqlite"

        return self.buildWorkload()
예제 #18
0
파일: Repack.py 프로젝트: dballesteros7/T0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a Repack workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.procScenario = arguments['ProcScenario']
        self.outputs = arguments['Outputs']

        # crashes if this isn't set
        self.globalTag = "NOTSET"

        # job splitting parameters
        self.repackSplitArgs = {}
        self.repackSplitArgs['maxSizeSingleLumi'] = arguments['MaxSizeSingleLumi']
        self.repackSplitArgs['maxSizeMultiLumi'] = arguments['MaxSizeMultiLumi']
        self.repackSplitArgs['maxInputEvents'] = arguments['MaxInputEvents']
        self.repackSplitArgs['maxInputFiles'] = arguments['MaxInputFiles']
        self.repackMergeSplitArgs = {}
        self.repackMergeSplitArgs['minInputSize'] = arguments['MinInputSize']
        self.repackMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize']
        self.repackMergeSplitArgs['maxEdmSize'] = arguments['MaxEdmSize']
        self.repackMergeSplitArgs['maxOverSize'] = arguments['MaxOverSize']
        self.repackMergeSplitArgs['maxInputEvents'] = arguments['MaxInputEvents']
        self.repackMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles']

        if arguments.has_key("Multicore"):
            numCores = arguments.get("Multicore")
            if numCores == None or numCores == "":
                self.multicore = False
            elif numCores == "auto":
                self.multicore = True
                self.multicoreNCores = "auto"
            else:
                self.multicore = True
                self.multicoreNCores = numCores

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        return self.buildWorkload()
예제 #19
0
    def __call__(self, workloadName, arguments):
        """
        __call__

        Create a StepChain workload with the given parameters.
        Configures the workload based on the first task information,
        then properly sets up the remaining tasks.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()

        # Update the task configuration
        taskConf = {}
        for k, v in arguments["Step1"].iteritems():
            taskConf[k] = v
        self.modifyTaskConfiguration(taskConf, True, 'InputDataset' not in taskConf)
        if taskConf['Multicore'] and taskConf['Multicore'] != 'None':
            self.multicoreNCores = int(taskConf['Multicore'])
        self.inputPrimaryDataset = taskConf.get("PrimaryDataset", self.primaryDataset)
        self.blockBlacklist = taskConf["BlockBlacklist"]
        self.blockWhitelist = taskConf["BlockWhitelist"]
        self.runBlacklist = taskConf["RunBlacklist"]
        self.runWhitelist = taskConf["RunWhitelist"]
        self.splittingAlgo = taskConf['SplittingAlgo']

        # Create the first task
        firstTask = self.workload.newTask(taskConf['StepName'])

        # Create a proper task and set workload level arguments
        if isGenerator(arguments):
            self.workload.setDashboardActivity("production")
            self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'],
                                                  taskConf['SplittingArguments'])
            self.workload.setEndPolicy("SingleShot")
            self.setupGeneratorTask(firstTask, taskConf)
        else:
            self.workload.setDashboardActivity("processing")
            self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'],
                                                  taskConf['SplittingArguments'])
            self.setupTask(firstTask, taskConf)
        self.reportWorkflowToDashboard(self.workload.getDashboardActivity())

        # Now modify this task to add the next steps
        if self.stepChain > 1:
            self.setupNextSteps(firstTask, arguments)

        # All tasks need to have this parameter set
        self.workload.setTaskPropertiesFromWorkload()

        return self.workload
예제 #20
0
파일: ReDigi.py 프로젝트: ticoann/WMCore
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReDigi workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.inputDataset = arguments["InputDataset"]
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)

        # Pull down the configs and the names of the output modules so that
        # we can chain things together properly.
        self.stepOneOutputModuleName = arguments.get("StepOneOutputModuleName", None)
        self.stepTwoOutputModuleName = arguments.get("StepTwoOutputModuleName")
        self.stepOneConfigCacheID = arguments.get("StepOneConfigCacheID")
        self.stepTwoConfigCacheID = arguments.get("StepTwoConfigCacheID", None)
        self.stepThreeConfigCacheID = arguments.get("StepThreeConfigCacheID")
        self.keepStepOneOutput = arguments.get("KeepStepOneOutput", True)
        self.keepStepTwoOutput = arguments.get("KeepStepTwoOutput", True)

        # Pileup configuration for the first generation task
        self.pileupConfig = arguments.get("PileupConfig", None)

        # Optional arguments that default to something reasonable.
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased")
        self.procJobSplitArgs = arguments.get("StdJobSplitArgs",
                                              {"lumis_per_job": 8,
                                               "include_parents": self.includeParents})
        return self.buildWorkload()
예제 #21
0
    def __call__(self, workloadName, arguments):
        StdBase.__call__(self, workloadName, arguments)

        # Handle the default of the various splitting algorithms
        self.procJobSplitArgs = {"include_parents": self.includeParents}
        if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased":
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.procJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.procJobSplitAlgo == "EventAwareLumiBased":
                self.procJobSplitArgs["max_events_per_lumi"] = 20000
        elif self.procJobSplitAlgo == "LumiBased":
            self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.procJobSplitAlgo == "FileBased":
            self.procJobSplitArgs["files_per_job"] = self.filesPerJob

        return
예제 #22
0
    def __call__(self, workloadName, arguments):
        StdBase.__call__(self, workloadName, arguments)

        # Handle the default of the various splitting algorithms
        self.procJobSplitArgs = {"include_parents" : self.includeParents}
        if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased":
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.procJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.procJobSplitAlgo == "EventAwareLumiBased":
                self.procJobSplitArgs["max_events_per_lumi"] = 20000
        elif self.procJobSplitAlgo == "LumiBased":
            self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.procJobSplitAlgo == "FileBased":
            self.procJobSplitArgs["files_per_job"] = self.filesPerJob

        return
예제 #23
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a DataProcessing workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.inputDataset = arguments["InputDataset"]
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]
        # Get the ConfigCacheID
        self.configCacheID = arguments.get("ConfigCacheID", None)
        # or alternatively CouchURL part can be replaced by ConfigCacheUrl,
        # then ConfigCacheUrl + CouchDBName + ConfigCacheID
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)

        # Optional output modules that will not be merged but may be used by subsequent steps
        self.transientModules = arguments.get("TransientOutputModules", [])

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get(
            "DbsUrl",
            "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased")
        self.procJobSplitArgs = arguments.get(
            "StdJobSplitArgs", {
                "lumis_per_job": 8,
                "include_parents": self.includeParents
            })
        return self.buildWorkload()
예제 #24
0
    def __call__(self, workloadName, arguments):
        """
        Store the arguments in attributes with the proper
        formatting.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Adjust the events by the filter efficiency
        self.totalEvents = int(self.requestNumEvents / self.filterEfficiency)

        # We don't write out every event in MC,
        # adjust the size per event accordingly
        self.sizePerEvent = self.sizePerEvent * self.filterEfficiency

        # Tune the splitting, only EventBased is allowed for MonteCarlo
        # 8h jobs are CMS standard, set the default with that in mind
        self.prodJobSplitAlgo = "EventBased"
        self.eventsPerJob, self.eventsPerLumi = StdBase.calcEvtsPerJobLumi(
            self.eventsPerJob, self.eventsPerLumi, self.timePerEvent)

        self.prodJobSplitArgs = {
            "events_per_job": self.eventsPerJob,
            "events_per_lumi": self.eventsPerLumi,
            "lheInputFiles": self.lheInputFiles
        }

        # Transform the pileup as required by the CMSSW step
        self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup)
        # Adjust the pileup splitting
        self.prodJobSplitArgs.setdefault("deterministicPileup",
                                         self.deterministicPileup)

        # Production can be extending statistics,
        # need to move the initial lfn counter
        self.previousJobCount = 0
        if self.firstLumi > 1:
            self.previousJobCount = int(
                math.ceil((self.firstEvent - 1) / self.eventsPerJob))
            self.prodJobSplitArgs[
                "initial_lfn_counter"] = self.previousJobCount

        # Feed values back to save in couch
        arguments['EventsPerJob'] = self.eventsPerJob

        return self.buildWorkload()
예제 #25
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a DataProcessing workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.inputDataset = arguments["InputDataset"]
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]

        # DataProcessing is split by block and can receive more blocks after first split for certain delay
        self.openRunningTimeout = int(arguments.get("OpenRunningTimeout", 0))

        # Get the ConfigCacheID
        self.configCacheID = arguments.get("ConfigCacheID", None)
        # or alternatively CouchURL part can be replaced by ConfigCacheUrl,
        # then ConfigCacheUrl + CouchDBName + ConfigCacheID
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)

        # Optional output modules that will not be merged but may be used by subsequent steps
        self.transientModules = arguments.get("TransientOutputModules", [])

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitAlgo  = arguments.get("StdJobSplitAlgo", "LumiBased")
        self.procJobSplitArgs  = arguments.get("StdJobSplitArgs",
                                               {"lumis_per_job": 8,
                                                "include_parents": self.includeParents})
        return self.buildWorkload()
예제 #26
0
    def __call__(self, workloadName, arguments):
        StdBase.__call__(self, workloadName, arguments)

        # Handle the default of the various splitting algorithms
        self.procJobSplitArgs = {"include_parents": self.includeParents}
        if self.procJobSplitAlgo in ["EventBased", "EventAwareLumiBased"]:
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            if self.procJobSplitAlgo == "EventAwareLumiBased":
                self.procJobSplitArgs["job_time_limit"] = 48 * 3600  # 2 days
            self.procJobSplitArgs["events_per_job"] = self.eventsPerJob
            arguments['EventsPerJob'] = self.eventsPerJob
        elif self.procJobSplitAlgo == "LumiBased":
            self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.procJobSplitAlgo == "FileBased":
            self.procJobSplitArgs["files_per_job"] = self.filesPerJob

        return
예제 #27
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitArgs = {}
        if self.procJobSplitAlgo == "EventBased" or self.procJobSplitAlgo == "EventAwareLumiBased":
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.procJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.procJobSplitAlgo == "EventAwareLumiBased":
                self.procJobSplitArgs["max_events_per_lumi"] = 100000
                self.procJobSplitArgs["capJobTime"] = 47 * 3600
                if self.multicore:
                    self.procJobSplitArgs["capJobDisk"] = max(
                        self.multicoreNCores * 20000000, 80000000)
                else:
                    self.procJobSplitArgs["capJobDisk"] = 80000000
        elif self.procJobSplitAlgo == "LumiBased":
            self.procJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.procJobSplitAlgo == "FileBased":
            self.procJobSplitArgs["files_per_job"] = self.filesPerJob
        self.skimJobSplitArgs = {}
        if self.skimJobSplitAlgo == "EventBased" or self.skimJobSplitAlgo == "EventAwareLumiBased":
            if self.eventsPerJob is None:
                self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
            self.skimJobSplitArgs["events_per_job"] = self.eventsPerJob
            if self.skimJobSplitAlgo == "EventAwareLumiBased":
                self.skimJobSplitArgs["max_events_per_lumi"] = 20000
        elif self.skimJobSplitAlgo == "LumiBased":
            self.skimJobSplitArgs["lumis_per_job"] = self.lumisPerJob
        elif self.skimJobSplitAlgo == "FileBased":
            self.skimJobSplitArgs["files_per_job"] = self.filesPerJob
        self.skimJobSplitArgs = arguments.get("SkimJobSplitArgs", {
            "files_per_job": 1,
            "include_parents": True
        })

        return self.buildWorkload()
예제 #28
0
    def __call__(self, workloadName, arguments):
        """
        Store the arguments in attributes with the proper
        formatting.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Adjust the events by the filter efficiency
        self.totalEvents = int(self.requestNumEvents / self.filterEfficiency)

        # We don't write out every event in MC,
        # adjust the size per event accordingly
        self.sizePerEvent = self.sizePerEvent * self.filterEfficiency

        # Tune the splitting, only EventBased is allowed for MonteCarlo
        # 8h jobs are CMS standard, set the default with that in mind
        self.prodJobSplitAlgo = "EventBased"
        self.eventsPerJob, self.eventsPerLumi = StdBase.calcEvtsPerJobLumi(self.eventsPerJob,
                                                                           self.eventsPerLumi,
                                                                           self.timePerEvent)

        self.prodJobSplitArgs = {"events_per_job": self.eventsPerJob,
                                 "events_per_lumi": self.eventsPerLumi,
                                 "lheInputFiles": self.lheInputFiles}

        # Transform the pileup as required by the CMSSW step
        self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup)
        # Adjust the pileup splitting
        self.prodJobSplitArgs.setdefault("deterministicPileup", self.deterministicPileup)

        # Production can be extending statistics,
        # need to move the initial lfn counter
        self.previousJobCount = 0
        if self.firstLumi > 1:
            self.previousJobCount = int(math.ceil((self.firstEvent - 1) / self.eventsPerJob))
            self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount

        # Feed values back to save in couch
        arguments['EventsPerJob'] = self.eventsPerJob

        return self.buildWorkload()
예제 #29
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.frameworkVersion = arguments['CMSSWVersion']
        self.globalTag = arguments['GlobalTag']
        self.procScenario = arguments['ProcScenario']
        self.writeTiers = arguments['WriteTiers']
        self.alcaSkims = arguments['AlcaSkims']
	self.inputDataset = arguments['InputDataset']

        if arguments.has_key('Multicore'):
            numCores = arguments.get('Multicore')
            if numCores == None or numCores == "":
                self.multicore = False
            elif numCores == "auto":
                self.multicore = True
                self.multicoreNCores = "auto"
            else:
                self.multicore = True
                self.multicoreNCores = numCores

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitAlgo  = arguments.get("StdJobSplitAlgo", "FileBased")
        self.procJobSplitArgs  = arguments.get("StdJobSplitArgs", {})

        return self.buildWorkload()
예제 #30
0
    def __call__(self, workloadName, arguments):
        """
        Create a workload instance for a MonteCarlo request
        
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.inputPrimaryDataset = arguments["PrimaryDataset"]
        self.frameworkVersion    = arguments["CMSSWVersion"]
        self.globalTag           = arguments["GlobalTag"]
        self.seeding             = arguments.get("Seeding", "AutomaticSeeding")
        self.prodConfigCacheID   = arguments["ProcConfigCacheID"]

        # Splitting arguments
        timePerEvent     = int(arguments.get("TimePerEvent", 60))
        filterEfficiency = float(arguments.get("FilterEfficiency", 1.0))
        totalTime        = int(arguments.get("TotalTime", 9 * 3600))
        self.totalEvents = int(int(arguments["RequestNumEvents"]) / filterEfficiency)

        # pileup configuration for the first generation task
        self.pileupConfig = arguments.get("PileupConfig", None)

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]        

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.emulation = arguments.get("Emulation", False)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        eventsPerJob = int(totalTime/timePerEvent/filterEfficiency)
        self.prodJobSplitAlgo  = arguments.get("ProdJobSplitAlgo", "EventBased")
        self.prodJobSplitArgs  = arguments.get("ProdJobSplitArgs",
                                               {"events_per_job": eventsPerJob})
        
        return self.buildWorkload()
예제 #31
0
    def __call__(self, workloadName, arguments):
        """
        __call__

        Create a RelValMC workload with the given parametrs.
        """
        StdBase.__call__(self, workloadName, arguments)

        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]

        # Required parameters relevant to the MC generation.
        self.genConfigCacheID = arguments["GenConfigCacheID"]
        self.inputPrimaryDataset = arguments["PrimaryDataset"]
        self.totalEvents = arguments["RequestNumEvents"]
        self.seeding = arguments.get("Seeding", "AutomaticSeeding")
        self.pileupConfig = arguments.get("PileupConfig", None)

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)

        # Generation step parameters
        self.genJobSplitAlgo = arguments.get("GenJobSplitAlgo", "EventBased")
        self.genJobSplitArgs = arguments.get("GenJobSplitArgs",
                                             {"events_per_job": 1000})

        # Processing step parameteras
        self.procJobSplitAlgo = arguments.get("ProcJobSplitAlgo", "FileBased")
        self.procJobSplitArgs = arguments.get("ProcJobSplitArgs",
                                              {"files_per_job": 1})

        self.genOutputModuleName = arguments.get("GenOutputModuleName", None)
        self.stepOneOutputModuleName = arguments.get("StepOneOutputModuleName",
                                                     None)
        self.stepOneConfigCacheID = arguments["StepOneConfigCacheID"]
        self.stepTwoConfigCacheID = arguments["StepTwoConfigCacheID"]
        return self.buildWorkload()
예제 #32
0
파일: RelValMC.py 프로젝트: cinquo/WMCore
    def __call__(self, workloadName, arguments):
        """
        __call__

        Create a RelValMC workload with the given parametrs.
        """
        StdBase.__call__(self, workloadName, arguments)

        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]

        # Required parameters relevant to the MC generation.
        self.genConfigCacheID = arguments["GenConfigCacheID"]
        self.inputPrimaryDataset = arguments["PrimaryDataset"]
        self.totalEvents = arguments["RequestNumEvents"]
        self.seeding = arguments.get("Seeding", "AutomaticSeeding")
        self.pileupConfig = arguments.get("PileupConfig", None)

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)

        # Generation step parameters
        self.genJobSplitAlgo = arguments.get("GenJobSplitAlgo", "EventBased")
        self.genJobSplitArgs = arguments.get("GenJobSplitArgs",
                                             {"events_per_job": 1000})

        # Processing step parameteras
        self.procJobSplitAlgo = arguments.get("ProcJobSplitAlgo", "FileBased")
        self.procJobSplitArgs = arguments.get("ProcJobSplitArgs",
                                              {"files_per_job": 1})

        self.genOutputModuleName = arguments.get("GenOutputModuleName", None)
        self.stepOneOutputModuleName = arguments.get("StepOneOutputModuleName", None)
        self.stepOneConfigCacheID = arguments["StepOneConfigCacheID"]
        self.stepTwoConfigCacheID = arguments["StepTwoConfigCacheID"]
        return self.buildWorkload()
예제 #33
0
    def __call__(self, workloadName, arguments):
        """
        Store the arguments in attributes with the proper
        formatting.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Adjust the events by the filter efficiency
        self.totalEvents = int(self.requestNumEvents / self.filterEfficiency)

        # We don't write out every event in MC,
        # adjust the size per event accordingly
        self.sizePerEvent = self.sizePerEvent * self.filterEfficiency

        # Tune the splitting, only EventBased is allowed for MonteCarlo
        # 8h jobs are CMS standard, set the default with that in mind
        self.prodJobSplitAlgo = "EventBased"
        if self.eventsPerJob is None:
            self.eventsPerJob = int((8.0 * 3600.0) / self.timePerEvent)
        if self.eventsPerLumi is None:
            self.eventsPerLumi = self.eventsPerJob
        self.prodJobSplitArgs = {
            "events_per_job": self.eventsPerJob,
            "events_per_lumi": self.eventsPerLumi,
            "lheInputFiles": self.lheInputFiles,
        }

        # Transform the pileup as required by the CMSSW step
        self.pileupConfig = parsePileupConfig(self.mcPileup, self.dataPileup)

        # Production can be extending statistics,
        # need to move the initial lfn counter
        self.previousJobCount = 0
        if self.firstLumi > 1:
            lumisPerJob = int(float(self.eventsPerJob) / self.eventsPerLumi)
            self.previousJobCount = self.firstLumi / lumisPerJob
            self.prodJobSplitArgs["initial_lfn_counter"] = self.previousJobCount

        return self.buildWorkload()
예제 #34
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a MonteCarloFromGEN workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.inputDataset = arguments["InputDataset"]
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]

        # The CouchURL and name of the ConfigCache database must be passed in
        # by the ReqMgr or whatever is creating this workflow.
        self.couchURL = arguments["CouchURL"]
        self.couchDBName = arguments["CouchDBName"]
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get(
            "DbsUrl",
            "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.emulation = arguments.get("Emulation", False)

        self.configCacheID = arguments.get("ConfigCacheID")

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.procJobSplitAlgo = arguments.get("StdJobSplitAlgo", "LumiBased")
        self.procJobSplitArgs = arguments.get("StdJobSplitArgs",
                                              {"lumis_per_job": 1})
        return self.buildWorkload()
예제 #35
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a Express workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.outputs = arguments['Outputs']

        # job splitting parameters (also required parameters)
        self.expressSplitArgs = {}
        self.expressSplitArgs['maxInputRate'] = arguments['MaxInputRate']
        self.expressSplitArgs['maxInputEvents'] = arguments['MaxInputEvents']
        self.expressMergeSplitArgs = {}
        self.expressMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize']
        self.expressMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles']
        self.expressMergeSplitArgs['maxLatency'] = arguments['MaxLatency']

        # fixed parameters that are used in various places
        self.alcaHarvestOutLabel = "Sqlite"

        return self.buildWorkload()
예제 #36
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a Express workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters that must be specified by the Requestor.
        self.outputs = arguments['Outputs']

        # job splitting parameters (also required parameters)
        self.expressSplitArgs = {}
        self.expressSplitArgs['maxInputRate'] = arguments['MaxInputRate']
        self.expressSplitArgs['maxInputEvents'] = arguments['MaxInputEvents']
        self.expressMergeSplitArgs = {}
        self.expressMergeSplitArgs['maxInputSize'] = arguments['MaxInputSize']
        self.expressMergeSplitArgs['maxInputFiles'] = arguments['MaxInputFiles']
        self.expressMergeSplitArgs['maxLatency'] = arguments['MaxLatency']

        # fixed parameters that are used in various places
        self.alcaHarvestOutLabel = "Sqlite"

        return self.buildWorkload()
예제 #37
0
    def __call__(self, workloadName, arguments):
        """
        Create a workload instance for an Analysis request

        """

        StdBase.__call__(self, workloadName, arguments)

        # Parameters for users
        self.owner_vogroup = arguments.get("VoGroup", '')
        self.owner_vorole = arguments.get("VoRole", '')
        self.userSandbox = arguments.get("userSandbox", None)
        self.userFiles = arguments.get("userFiles", [])
        self.outputFiles = arguments.get("OutputFiles", [])
        self.userName = arguments.get("Username", 'jblow')
        self.saveLogs = arguments.get("SaveLogs", True)
        self.emulation = arguments.get("Emulation", False)

        # Workflow creation
        self.couchURL = arguments.get("CouchURL")
        self.couchDBName = arguments.get("CouchDBName", "wmagent_configcache")
        self.configCacheID = arguments.get("AnalysisConfigCacheDoc", None)
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)

        self.minMergeSize = 1

        self.frameworkVersion = arguments["CMSSWVersion"]
        self.acquisitionEra = arguments.get("PublishDataName",
                                            str(int(time.time())))
        self.globalTag = arguments.get("GlobalTag", None)

        self.inputDataset = arguments.get('InputDataset', None)
        self.processingVersion = arguments.get('ProcessingVersion', 1)
        self.origRequest = arguments.get('OriginalRequestName', '')

        # Sites
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.asyncDest = arguments.get("asyncDest", "T1_US_FNAL_Buffer")

        # ACDC and job splitting
        self.ACDCURL = arguments.get("ACDCUrl", "")
        self.ACDCDBName = arguments.get("ACDCDBName", "wmagent_acdc")
        self.Runs = arguments.get("Runs", None)
        self.Lumis = arguments.get("Lumis", None)
        self.Submission = arguments.get("Submission", 1)
        self.analysisJobSplitAlgo = arguments.get("JobSplitAlgo", "EventBased")

        if self.Lumis and self.analysisJobSplitAlgo not in ['LumiBased']:
            raise RuntimeError(
                'Running on selected lumis only supported in split mode(s) %s'
                % 'LumiBased')

        if self.analysisJobSplitAlgo == 'EventBased':
            self.analysisJobSplitArgs = arguments.get('JobSplitArgs',
                                                      {'events_per_job': 1000})
        elif self.analysisJobSplitAlgo == 'LumiBased':
            self.analysisJobSplitArgs = arguments.get('JobSplitArgs',
                                                      {'lumis_per_job': 15})
            if self.Lumis:
                self.analysisJobSplitArgs.update({'lumis': self.Lumis})
                self.analysisJobSplitArgs.update({'runs': self.Runs})
            self.analysisJobSplitArgs.update({
                'halt_job_on_file_boundaries': False,
                'splitOnRun': False,
            })
        else:
            self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {})

        return self.buildWorkload()
예제 #38
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a StoreResults workload with the given parameters.
        """
        # first of all, we update the merged LFN based on the physics group
        arguments['MergedLFNBase'] += "/" + arguments['PhysicsGroup'].lower()
        StdBase.__call__(self, workloadName, arguments)

        (inputPrimaryDataset, inputProcessedDataset,
         inputDataTier) = self.inputDataset[1:].split("/")

        workload = self.createWorkload()

        mergeTask = workload.newTask("StoreResults")
        self.addDashboardMonitoring(mergeTask)
        mergeTaskCmssw = mergeTask.makeStep("cmsRun1")
        mergeTaskCmssw.setStepType("CMSSW")

        mergeTaskStageOut = mergeTaskCmssw.addStep("stageOut1")
        mergeTaskStageOut.setStepType("StageOut")

        mergeTaskLogArch = mergeTaskCmssw.addStep("logArch1")
        mergeTaskLogArch.setStepType("LogArchive")

        self.addLogCollectTask(mergeTask, taskName="StoreResultsLogCollect")

        mergeTask.setTaskType("Merge")
        mergeTask.applyTemplates()

        mergeTask.addInputDataset(name=self.inputDataset,
                                  primary=inputPrimaryDataset,
                                  processed=inputProcessedDataset,
                                  tier=inputDataTier,
                                  dbsurl=self.dbsUrl,
                                  block_blacklist=self.blockBlacklist,
                                  block_whitelist=self.blockWhitelist,
                                  run_blacklist=self.runBlacklist,
                                  run_whitelist=self.runWhitelist)

        splitAlgo = "ParentlessMergeBySize"
        mergeTask.setSplittingAlgorithm(splitAlgo,
                                        max_merge_size=self.maxMergeSize,
                                        min_merge_size=self.minMergeSize,
                                        max_merge_events=self.maxMergeEvents)

        mergeTaskCmsswHelper = mergeTaskCmssw.getTypeHelper()
        mergeTaskCmsswHelper.cmsswSetup(self.frameworkVersion,
                                        softwareEnvironment="",
                                        scramArch=self.scramArch)
        mergeTaskCmsswHelper.setGlobalTag(self.globalTag)
        mergeTaskCmsswHelper.setSkipBadFiles(True)
        mergeTaskCmsswHelper.setDataProcessingConfig("do_not_use", "merge")

        self.addOutputModule(mergeTask,
                             "Merged",
                             primaryDataset=inputPrimaryDataset,
                             dataTier=self.dataTier,
                             filterName=None,
                             forceMerged=True)

        workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase)
        workload.setDashboardActivity("StoreResults")

        # setting the parameters which need to be set for all the tasks
        # sets acquisitionEra, processingVersion, processingString
        workload.setTaskPropertiesFromWorkload()
        self.reportWorkflowToDashboard(workload.getDashboardActivity())

        return workload
예제 #39
0
 def __call__(self, workloadName, arguments):
     StdBase.__call__(self, workloadName, arguments)
     self.originalRequestName = self.initialTaskPath.split('/')[1]
     #TODO remove the None case when reqmgr is retired
     return self.buildWorkload(arguments.get("OriginalRequestCouchURL", None))
예제 #40
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a StoreResults workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = self.inputDataset[1:].split("/")

        workload = self.createWorkload()
        
        workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase)
        workload.setDashboardActivity("StoreResults")
        self.reportWorkflowToDashboard(workload.getDashboardActivity())

        mergeTask = workload.newTask("StoreResults")
        self.addDashboardMonitoring(mergeTask)
        mergeTaskCmssw = mergeTask.makeStep("cmsRun1")
        mergeTaskCmssw.setStepType("CMSSW")

        mergeTaskStageOut = mergeTaskCmssw.addStep("stageOut1")
        mergeTaskStageOut.setStepType("StageOut")
        
        mergeTaskLogArch = mergeTaskCmssw.addStep("logArch1")
        mergeTaskLogArch.setStepType("LogArchive")

        mergeTask.setSiteWhitelist(self.siteWhitelist)
        mergeTask.setSiteBlacklist(self.siteBlacklist)

        self.addLogCollectTask(mergeTask, taskName = "StoreResultsLogCollect")
        
        mergeTask.setTaskType("Merge")
        mergeTask.applyTemplates()
        
        mergeTask.addInputDataset(primary = self.inputPrimaryDataset,
                                  processed = self.inputProcessedDataset,
                                  tier = self.inputDataTier,
                                  dbsurl = self.dbsUrl,
                                  block_blacklist = self.blockBlacklist,
                                  block_whitelist = self.blockWhitelist,
                                  run_blacklist = self.runBlacklist,
                                  run_whitelist = self.runWhitelist)

        splitAlgo = "ParentlessMergeBySize"
        mergeTask.setSplittingAlgorithm(splitAlgo,
                                        max_merge_size = self.maxMergeSize,
                                        min_merge_size = self.minMergeSize,
                                        max_merge_events = self.maxMergeEvents)
        
        mergeTaskCmsswHelper = mergeTaskCmssw.getTypeHelper()
        mergeTaskCmsswHelper.cmsswSetup(self.frameworkVersion, softwareEnvironment = "",
                                        scramArch = self.scramArch)
        mergeTaskCmsswHelper.setGlobalTag(self.globalTag)
        mergeTaskCmsswHelper.setSkipBadFiles(True)
        mergeTaskCmsswHelper.setDataProcessingConfig("do_not_use", "merge")
        
        self.addOutputModule(mergeTask, "Merged",
                             primaryDataset = self.inputPrimaryDataset,
                             dataTier = self.dataTier,
                             filterName = None,
                             forceMerged = True)

        # setting the parameters which need to be set for all the tasks
        # sets acquisitionEra, processingVersion, processingString
        workload.setTaskPropertiesFromWorkload()
        
        return workload
예제 #41
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()
        self.arguments = arguments
        self.couchURL = arguments['CouchURL']
        self.couchDBName = arguments['CouchDBName']
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments.get("GlobalTag", None)

        # Optional arguments that default to something reasonable.
        self.dbsUrl = arguments.get(
            "DbsUrl",
            "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.emulation = arguments.get("Emulation", False)

        numTasks = arguments['TaskChain']
        for i in range(1, numTasks + 1):
            #consistency check that there are numTasks defined in the request:
            if not arguments.has_key("Task%s" % i):
                msg = "Specified number of tasks: %s does not match defined task dictionary for Task%s" % (
                    i, i)
                raise RuntimeError, msg

            taskConf = getTaskN(arguments, i)
            parent = parentTaskName(taskConf)

            # Set task-specific global parameters
            self.blockBlacklist = taskConf.get("BlockBlacklist", [])
            self.blockWhitelist = taskConf.get("BlockWhitelist", [])
            self.runBlacklist = taskConf.get("RunBlacklist", [])
            self.runWhitelist = taskConf.get("RunWhitelist", [])

            parentTask = None
            if parent in self.mergeMapping:
                parentTask = self.mergeMapping[parent][parentTaskModule(
                    taskConf)]

            task = self.makeTask(taskConf, parentTask)
            if i == 1:
                # First task will either be generator or processing
                self.workload.setDashboardActivity("relval")
                if isGenerator(arguments):
                    # generate mc events
                    self.workload.setWorkQueueSplitPolicy(
                        "MonteCarlo", taskConf['SplittingAlgorithm'],
                        taskConf['SplittingArguments'])
                    self.workload.setEndPolicy("SingleShot")
                    self.setupGeneratorTask(task, taskConf)
                else:
                    # process an existing dataset
                    self.workload.setWorkQueueSplitPolicy(
                        "Block", taskConf['SplittingAlgorithm'],
                        taskConf['SplittingArguments'])
                    self.setupTask(task, taskConf)
                self.reportWorkflowToDashboard(
                    self.workload.getDashboardActivity())
            else:
                # all subsequent tasks have to be processing tasks
                self.setupTask(task, taskConf)
            self.taskMapping[task.name()] = taskConf

        return self.workload
예제 #42
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()

        # Detect blow-up factor from first task in chain.
        blowupFactor = 1
        if (self.taskChain > 1) and 'TimePerEvent' in arguments["Task1"]:
            origTpe = arguments["Task1"]['TimePerEvent']
            if origTpe <= 0:
                origTpe = 1.0
            sumTpe = 0
            tpeCount = 0
            for i in xrange(1, self.taskChain + 1):
                if 'TimePerEvent' in arguments["Task%d" % i]:
                    sumTpe += arguments["Task%d" % i]['TimePerEvent']
                    tpeCount += 1
            if tpeCount > 0:
                blowupFactor = sumTpe / origTpe

        for i in xrange(1, self.taskChain + 1):

            originalTaskConf = arguments["Task%d" % i]
            taskConf = {}
            # Make a shallow copy of the taskConf
            for k, v in originalTaskConf.items():
                taskConf[k] = v
            parent = taskConf.get("InputTask", None)

            self.modifyTaskConfiguration(taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf)

            # Set task-specific global parameters
            self.blockBlacklist = taskConf["BlockBlacklist"]
            self.blockWhitelist = taskConf["BlockWhitelist"]
            self.runBlacklist = taskConf["RunBlacklist"]
            self.runWhitelist = taskConf["RunWhitelist"]

            parentTask = None
            if parent in self.mergeMapping:
                parentTask = self.mergeMapping[parent][parentTaskModule(taskConf)]

            task = self.makeTask(taskConf, parentTask)

            if i == 1:
                # First task will either be generator or processing
                self.workload.setDashboardActivity("relval")
                if isGenerator(arguments):
                    # generate mc events
                    self.workload.setWorkQueueSplitPolicy("MonteCarlo", taskConf['SplittingAlgo'],
                                                          taskConf['SplittingArguments'],
                                                          blowupFactor=blowupFactor)
                    self.workload.setEndPolicy("SingleShot")
                    self.setupGeneratorTask(task, taskConf)
                else:
                    # process an existing dataset
                    self.workload.setWorkQueueSplitPolicy("Block", taskConf['SplittingAlgo'],
                                                          taskConf['SplittingArguments'],
                                                          blowupFactor=blowupFactor)
                    self.setupTask(task, taskConf)
                self.reportWorkflowToDashboard(self.workload.getDashboardActivity())
            else:
                # all subsequent tasks have to be processing tasks
                self.setupTask(task, taskConf)
            self.taskMapping[task.name()] = taskConf

        self.workload.ignoreOutputModules(self.ignoredOutputModules)

        return self.workload
예제 #43
0
파일: Resubmission.py 프로젝트: dmwm/WMCore
 def __call__(self, workloadName, arguments):
     StdBase.__call__(self, workloadName, arguments)
     self.originalRequestName = self.initialTaskPath.split('/')[1]
     return self.buildWorkload(arguments)
예제 #44
0
    def __call__(self, workloadName, arguments):
        """
        Create a workload instance for an Analysis request
        """

        StdBase.__call__(self, workloadName, arguments)

        self.globalTag = arguments.get("GlobalTag", None)

        # Required parameters.
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.inputDataset = arguments["InputDataset"]
        self.processingVersion = arguments.get("ProcessingVersion", "v1")
        self.origRequest = arguments.get("OriginalRequestName", "")
        self.emulation = arguments.get("Emulation", False)

        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])

        self.couchURL = arguments.get("CouchUrl")
        self.couchDBName = arguments.get("CouchDBName", "wmagent_configcache")
        self.analysisConfigCacheID = arguments.get("AnalysisConfigCacheDoc", None)
        self.ACDCURL = arguments.get("ACDCUrl", "")
        self.ACDCDBName = arguments.get("ACDCDBName", "wmagent_acdc")
        self.ACDCID = arguments.get("ACDCDoc", None)

        # These are mostly place holders because the job splitting algo and
        # parameters will be updated after the workflow has been created.
        self.analysisJobSplitAlgo = arguments.get("JobSplitAlgo", "EventBased")

        if self.ACDCID and self.analysisJobSplitAlgo not in ["LumiBased"]:
            raise RuntimeError("Running on selected lumis only supported in split mode(s) %s" % "LumiBased")

        if self.analysisJobSplitAlgo == "EventBased":
            self.analysisJobSplitArgs = arguments.get("JobSplitArgs", {"events_per_job": 1000})
        elif self.analysisJobSplitAlgo == "LumiBased":
            self.analysisJobSplitArgs = arguments.get("JobSplitArgs", {"lumis_per_job": 15})
            if self.ACDCID:
                self.analysisJobSplitArgs.update(
                    {
                        "filesetName": self.ACDCID,
                        "collectionName": self.origRequest,
                        "couchURL": self.ACDCURL,
                        "couchDB": self.ACDCDBName,
                        "owner": self.owner,
                        "group": self.group,
                    }
                )
            self.analysisJobSplitArgs.update({"halt_job_on_file_boundaries": False, "splitOnRun": False})
        else:
            self.analysisJobSplitArgs = arguments.get("JobSplitArgs", {})

        self.asyncDest = arguments.get("asyncDest", "T1_US_FNAL_Buffer")
        self.minMergeSize = 1  # arguments.get("MinMergeSize", 1)
        self.acquisitionEra = arguments.get("PublishDataName", str(int(time.time())))
        self.owner_vogroup = arguments.get("VoGroup", "")
        self.owner_vorole = arguments.get("VoRole", "")
        self.userSandbox = arguments.get("userSandbox", None)
        self.userFiles = arguments.get("userFiles", [])
        self.userName = arguments.get("Username", "jblow")
        self.saveLogs = arguments.get("SaveLogs", True)
        self.outputFiles = arguments.get("OutputFiles", [])

        return self.buildWorkload()
예제 #45
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a StoreResults workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters.
        self.inputDataset = arguments["InputDataset"]
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]
        self.cmsPath = arguments["CmsPath"]

        # Optional arguments.
        self.dbsUrl = arguments.get(
            "DbsUrl",
            "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlackList = arguments.get("BlockBlackList", [])
        self.blockWhiteList = arguments.get("BlockWhiteList", [])
        self.runBlackList = arguments.get("RunBlackList", [])
        self.runWhiteList = arguments.get("RunWhiteList", [])
        self.emulation = arguments.get("Emulation", False)
        self.stdJobSplitAlgo = arguments.get("StdJobSplitAlgo", 'FileBased')
        self.stdJobSplitArgs = arguments.get("StdJobSplitArgs",
                                             {'files_per_job': 1})
        self.dataTier = arguments.get("DataTier", 'USER')
        self.configCacheUrl = arguments.get("ConfigCacheUrl", None)
        dataTier = self.dataTier

        (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = \
                                   self.inputDataset[1:].split("/")

        processedDatasetName = "%s-%s" % (self.acquisitionEra,
                                          self.processingVersion)

        workload = self.createWorkload()
        workload.setDashboardActivity("StoreResults")
        self.reportWorkflowToDashboard(workload.getDashboardActivity())

        mergeTask = workload.newTask("StoreResults")
        self.addDashboardMonitoring(mergeTask)
        mergeTaskCmssw = mergeTask.makeStep("cmsRun1")

        mergeTaskCmssw.setStepType("CMSSW")

        mergeTaskStageOut = mergeTaskCmssw.addStep("stageOut1")
        mergeTaskStageOut.setStepType("StageOut")
        mergeTaskLogArch = mergeTaskCmssw.addStep("logArch1")
        mergeTaskLogArch.setStepType("LogArchive")
        self.addLogCollectTask(mergeTask, taskName="StoreResultsLogCollect")
        mergeTask.setTaskType("Merge")
        mergeTask.applyTemplates()
        mergeTask.addInputDataset(primary=self.inputPrimaryDataset,
                                  processed=self.inputProcessedDataset,
                                  tier=self.inputDataTier,
                                  dbsurl=self.dbsUrl,
                                  block_blacklist=self.blockBlackList,
                                  block_whitelist=self.blockWhiteList,
                                  run_blacklist=self.runBlackList,
                                  run_whitelist=self.runWhiteList)
        splitAlgo = "ParentlessMergeBySize"
        mergeTask.setSplittingAlgorithm(splitAlgo,
                                        max_merge_size=self.maxMergeSize,
                                        min_merge_size=self.minMergeSize,
                                        max_merge_events=self.maxMergeEvents,
                                        siteWhitelist=self.siteWhitelist,
                                        siteBlacklist=self.siteBlacklist)

        mergeTaskCmsswHelper = mergeTaskCmssw.getTypeHelper()
        mergeTaskCmsswHelper.cmsswSetup(self.frameworkVersion,
                                        softwareEnvironment="",
                                        scramArch=self.scramArch)
        mergeTaskCmsswHelper.setDataProcessingConfig("cosmics", "merge")

        mergedLFN = "%s/%s/%s/%s/%s" % (
            self.mergedLFNBase, self.acquisitionEra, self.inputPrimaryDataset,
            dataTier, self.processingVersion)

        mergeTaskCmsswHelper.addOutputModule(
            "Merged",
            primaryDataset=self.inputPrimaryDataset,
            processedDataset=processedDatasetName,
            dataTier=dataTier,
            lfnBase=mergedLFN)

        return workload
예제 #46
0
    def __call__(self, workloadName, arguments):
        """
        Create a workload instance for an Analysis request

        """

        StdBase.__call__(self, workloadName, arguments)

        # Parameters for users
        self.owner_vogroup = arguments.get("VoGroup", '')
        self.owner_vorole = arguments.get("VoRole", '')
        self.userSandbox = arguments.get("userSandbox", None)
        self.userFiles = arguments.get("userFiles", [])
        self.outputFiles = arguments.get("OutputFiles", [])
        self.userName = arguments.get("Username",'jblow')
        self.saveLogs = arguments.get("SaveLogs", True)
        self.emulation = arguments.get("Emulation", False)

        # Workflow creation
        self.couchURL = arguments.get("CouchURL")
        self.couchDBName = arguments.get("CouchDBName", "wmagent_configcache")
        self.minMergeSize = 1

        self.configCacheID = arguments.get("AnalysisConfigCacheDoc", None)
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.acquisitionEra = arguments.get("PublishDataName", str(int(time.time())))
        self.globalTag = arguments.get("GlobalTag", None)

        self.inputDataset = arguments.get('InputDataset', None)
        self.processingVersion = arguments.get('ProcessingVersion', '1')
        self.origRequest = arguments.get('OriginalRequestName', '')

        # Sites
        self.blockBlacklist = arguments.get("BlockBlacklist", [])
        self.blockWhitelist = arguments.get("BlockWhitelist", [])
        self.runWhitelist = arguments.get("RunWhitelist", [])
        self.runBlacklist = arguments.get("RunBlacklist", [])
        self.asyncDest = arguments.get("asyncDest", "T1_US_FNAL_Buffer")

        # ACDC and job splitting
        self.ACDCURL = arguments.get("ACDCUrl", "")
        self.ACDCDBName = arguments.get("ACDCDBName", "wmagent_acdc")
        self.ACDCID = arguments.get("ACDCDoc", None)
        self.analysisJobSplitAlgo  = arguments.get("JobSplitAlgo", "EventBased")

        if self.ACDCID and self.analysisJobSplitAlgo not in ['LumiBased']:
            raise RuntimeError('Running on selected lumis only supported in split mode(s) %s' %
                               'LumiBased')

        if self.analysisJobSplitAlgo == 'EventBased':
            self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {'events_per_job' : 1000})
        elif self.analysisJobSplitAlgo == 'LumiBased':
            self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {'lumis_per_job' : 15})
            if self.ACDCID:
                self.analysisJobSplitArgs.update(
                            {'filesetName' : self.ACDCID,
                             'collectionName' : self.origRequest,
                             'couchURL' : self.ACDCURL,
                             'couchDB' : self.ACDCDBName,
                             'owner' : self.owner,
                             'group' : self.group,
                            })
            self.analysisJobSplitArgs.update(
                           {'halt_job_on_file_boundaries' : False,
                            'splitOnRun' : False,
                           })
        else:
            self.analysisJobSplitArgs = arguments.get('JobSplitArgs', {})

        return self.buildWorkload()
예제 #47
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a ReReco workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)
        self.workload = self.createWorkload()

        # Detect blow-up factor from first task in chain.
        blowupFactor = 1
        if (self.taskChain > 1) and 'TimePerEvent' in arguments["Task1"]:
            origTpe = arguments["Task1"]['TimePerEvent']
            if origTpe <= 0:
                origTpe = 1.0
            sumTpe = 0
            tpeCount = 0
            for i in xrange(1, self.taskChain + 1):
                if 'TimePerEvent' in arguments["Task%d" % i]:
                    sumTpe += arguments["Task%d" % i]['TimePerEvent']
                    tpeCount += 1
            if tpeCount > 0:
                blowupFactor = sumTpe / origTpe

        for i in xrange(1, self.taskChain + 1):

            originalTaskConf = arguments["Task%d" % i]
            taskConf = {}
            # Make a shallow copy of the taskConf
            for k, v in originalTaskConf.items():
                taskConf[k] = v
            parent = taskConf.get("InputTask", None)

            self.modifyTaskConfiguration(
                taskConf, i == 1, i == 1 and 'InputDataset' not in taskConf)

            # Set task-specific global parameters
            self.blockBlacklist = taskConf["BlockBlacklist"]
            self.blockWhitelist = taskConf["BlockWhitelist"]
            self.runBlacklist = taskConf["RunBlacklist"]
            self.runWhitelist = taskConf["RunWhitelist"]

            parentTask = None
            if parent in self.mergeMapping:
                parentTask = self.mergeMapping[parent][parentTaskModule(
                    taskConf)]

            task = self.makeTask(taskConf, parentTask)

            if i == 1:
                # First task will either be generator or processing
                self.workload.setDashboardActivity("relval")
                if isGenerator(arguments):
                    # generate mc events
                    self.workload.setWorkQueueSplitPolicy(
                        "MonteCarlo",
                        taskConf['SplittingAlgo'],
                        taskConf['SplittingArguments'],
                        blowupFactor=blowupFactor)
                    self.workload.setEndPolicy("SingleShot")
                    self.setupGeneratorTask(task, taskConf)
                else:
                    # process an existing dataset
                    self.workload.setWorkQueueSplitPolicy(
                        "Block",
                        taskConf['SplittingAlgo'],
                        taskConf['SplittingArguments'],
                        blowupFactor=blowupFactor)
                    self.setupTask(task, taskConf)
            else:
                # all subsequent tasks have to be processing tasks
                self.setupTask(task, taskConf)
            self.taskMapping[task.name()] = taskConf

        # now that all tasks have been created, create the parent x output dataset map
        self.createTaskParentageMapping(arguments)
        self.workload.setTaskParentageMapping(self.taskOutputMapping)

        self.workload.ignoreOutputModules(self.ignoredOutputModules)
        self.reportWorkflowToDashboard(self.workload.getDashboardActivity())
        # and push the parentage map to the reqmgr2 workload cache doc
        arguments[
            'ChainParentageMap'] = self.workload.getChainParentageSimpleMapping(
            )

        # Feed values back to save in couch
        if self.eventsPerJob:
            arguments['Task1']['EventsPerJob'] = self.eventsPerJob
        if self.eventsPerLumi:
            arguments['Task1']['EventsPerLumi'] = self.eventsPerLumi
        return self.workload
예제 #48
0
 def __call__(self, workloadName, arguments):
     StdBase.__call__(self, workloadName, arguments)
     self.originalRequestName = self.initialTaskPath.split('/')[1]
     return self.buildWorkload(arguments)
예제 #49
0
    def __call__(self, workloadName, arguments):
        """
        _call_

        Create a StoreResults workload with the given parameters.
        """
        StdBase.__call__(self, workloadName, arguments)

        # Required parameters.
        self.inputDataset = arguments["InputDataset"]
        self.frameworkVersion = arguments["CMSSWVersion"]
        self.globalTag = arguments["GlobalTag"]
        self.cmsPath = arguments["CmsPath"]

        # Required parameters that can be empty.
        self.scenario = arguments["Scenario"]

        # Optional arguments.
        self.dbsUrl = arguments.get("DbsUrl", "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.blockBlackList = arguments.get("BlockBlackList", [])
        self.blockWhiteList = arguments.get("BlockWhiteList", [])
        self.runBlackList = arguments.get("RunBlackList", [])
        self.runWhiteList = arguments.get("RunWhiteList", [])
        self.emulation = arguments.get("Emulation", False)
        self.stdJobSplitAlgo  = arguments.get("StdJobSplitAlgo", 'FileBased')
        self.stdJobSplitArgs  = arguments.get("StdJobSplitArgs", {'files_per_job': 1})
        self.dataTier         = arguments.get("DataTier", 'USER')
        dataTier = self.dataTier

        (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = \
                                   self.inputDataset[1:].split("/")

        processedDatasetName = "%s-%s" % (self.acquisitionEra, self.processingVersion)

        workload = self.createWorkload()
        mergeTask = workload.newTask("StoreResults")

        self.addDashboardMonitoring(mergeTask)
        mergeTaskCmssw = mergeTask.makeStep("cmsRun1")

        mergeTaskCmssw.setStepType("CMSSW")

        mergeTaskStageOut = mergeTaskCmssw.addStep("stageOut1")
        mergeTaskStageOut.setStepType("StageOut")
        mergeTaskLogArch = mergeTaskCmssw.addStep("logArch1")
        mergeTaskLogArch.setStepType("LogArchive")
        self.addLogCollectTask(mergeTask, taskName = "StoreResultsLogCollect")
        mergeTask.addGenerator("BasicNaming")
        mergeTask.addGenerator("BasicCounter")
        mergeTask.setTaskType("Merge")
        mergeTask.applyTemplates()
        mergeTask.addInputDataset(primary = self.inputPrimaryDataset, processed = self.inputProcessedDataset,
                                     tier = self.inputDataTier, dbsurl = self.dbsUrl,
                                     block_blacklist = self.blockBlackList,
                                     block_whitelist = self.blockWhiteList,
                                     run_blacklist = self.runBlackList,
                                     run_whitelist = self.runWhiteList)
        splitAlgo = "ParentlessMergeBySize"
        mergeTask.setSplittingAlgorithm(splitAlgo,
                                        max_merge_size = self.maxMergeSize,
                                        min_merge_size = self.minMergeSize,
                                        max_merge_events = self.maxMergeEvents,
                                        siteWhitelist = self.siteWhitelist,
                                        siteBlacklist = self.siteBlacklist)

        mergeTaskCmsswHelper = mergeTaskCmssw.getTypeHelper()
        mergeTaskCmsswHelper.cmsswSetup(self.frameworkVersion, softwareEnvironment = "",
                                        scramArch = self.scramArch)
        mergeTaskCmsswHelper.setDataProcessingConfig("cosmics", "merge")

        mergedLFN = "%s/%s/%s/%s/%s" % (self.mergedLFNBase, self.acquisitionEra,
                                        self.inputPrimaryDataset, dataTier,
                                        self.processingVersion)

        mergeTaskCmsswHelper.addOutputModule("Merged",
                                             primaryDataset = self.inputPrimaryDataset,
                                             processedDataset = processedDatasetName,
                                             dataTier = dataTier,
                                             lfnBase = mergedLFN)

        return workload