Example #1
0
    def testFirstEvent(self):
        """
        _testFirstEvent_

        Verify that we set the firstEvent parameter whenever the FirstEvent
        field in the job mask is a positive integer. And the job is
        not production.
        """
        job = Job()
        job["input_files"] = [{"lfn": "bogusFile", "parents": []}]
        job["mask"] = Mask()

        tweak = PSetTweak()
        WMTweaks.makeJobTweak(job, tweak)

        self.assertFalse(hasattr(tweak.process.source, "skipEvents"),
                         "Error: There should be no skipEvents tweak.")
        self.assertFalse(hasattr(tweak.process.source, "firstEvent"),
                         "Error: There should be no firstEvent tweak.")

        job["mask"]["FirstEvent"] = 0
        tweak = PSetTweak()
        WMTweaks.makeJobTweak(job, tweak)

        self.assertTrue(hasattr(tweak.process.source, "skipEvents"),
                        "Error: There should be a skipEvents tweak.")
        self.assertEqual(tweak.process.source.skipEvents,
                         'customTypeCms.untracked.uint32(0)',
                         "Error: The skipEvents tweak should be 0.")
        return
Example #2
0
 def handleSeeding(self):
     """
     _handleSeeding_
     
     Handle Random Seed settings for the job
     
     """
     baggage = self.job.getBaggage()
     seeding = getattr(baggage, "seeding", None)
     if seeding == None:
         return
     if seeding == "AutomaticSeeding":
         from IOMC.RandomEngine.RandomServiceHelper import RandomNumberServiceHelper
         helper = RandomNumberServiceHelper(self.process.RandomNumberGeneratorService)
         helper.populate()
         return
     if seeding == "ReproducibleSeeding":
         randService = self.process.RandomNumberGeneratorService
         tweak = PSetTweak()
         for x in randService:
             parameter = "process.RandomNumberGeneratorService.%s.initialSeed" % x._internal_name
             tweak.addParameter(parameter, x.initialSeed)
         applyTweak(self.process, tweak, self.fixupDict)
         return
     # still here means bad seeding algo name
     raise RuntimeError, "Bad Seeding Algorithm: %s" % seeding
Example #3
0
    def makeThreadsStreamsTweak(self):
        """
        _makeThreadsStreamsTweak_

        Tweak threads and streams paraameters
        """
        origCores = int(
            getattr(self.step.data.application.multicore, 'numberOfCores', 1))
        eventStreams = int(
            getattr(self.step.data.application.multicore, 'eventStreams', 0))
        resources = {'cores': origCores}
        resizeResources(resources)
        numCores = resources['cores']
        if numCores != origCores:
            self.logger.info(
                "Resizing a job with nStreams != nCores. Setting nStreams = nCores. This may end badly."
            )
            eventStreams = 0

        tweak = PSetTweak()
        tweak.addParameter("process.options", "customTypeCms.untracked.PSet()")
        self.applyPsetTweak(tweak, skipIfSet=True)
        self.tweak.addParameter(
            "process.options.numberOfThreads",
            "customTypeCms.untracked.uint32(%s)" % numCores)
        self.tweak.addParameter(
            "process.options.numberOfStreams",
            "customTypeCms.untracked.uint32(%s)" % eventStreams)

        return
Example #4
0
    def testFirstEventMC(self):
        """
        _testFirstEventMC_

        Verify that we set the firstEvent parameter whenever the FirstEvent
        field in the job mask is a positive integer and the job is a production
        one. Otherwise we get a number based on the counter (modulo 2^32 - 1)
        """
        job = Job()
        job["input_files"] = [{"lfn": "MCFakeFile", "parents": []}]
        job["mask"] = Mask()
        job["counter"] = 5
        job["mask"]["FirstLumi"] = 200

        try:
            tweak = PSetTweak()
            WMTweaks.makeJobTweak(job, tweak)
            self.assertRaises(WMTweakMaskError, WMTweaks.makeJobTweak, job)
        except WMTweakMaskError:
            pass

        job["mask"]["FirstEvent"] = 100
        tweak = PSetTweak()
        WMTweaks.makeJobTweak(job, tweak)
        self.assertFalse(
            hasattr(tweak.process.source, "skipEvents"),
            "Error: There should be no skipEvents tweak, it's MC.")
        self.assertTrue(hasattr(tweak.process.source, "firstEvent"),
                        "Error: There should be a first event tweak")
        self.assertEqual(tweak.process.source.firstEvent,
                         'customTypeCms.untracked.uint32(100)',
                         "Error: The firstEvent tweak should be 100.")
        return
Example #5
0
    def testFirstRunMC(self):
        """
        _testFirstRunMC_
        Verify that we set the lumi in a MC job and it gets into
        process.source.firstRun parameter.
        """
        job = Job()
        job["input_files"] = [{"lfn": "MCFakeFile", "parents": []}]
        job["mask"] = Mask()
        job["mask"]["FirstLumi"] = 200
        job["mask"]["FirstEvent"] = 100
        job["counter"] = 5

        tweak = PSetTweak()
        WMTweaks.makeJobTweak(job, tweak)

        self.assertTrue(hasattr(tweak.process.source, "firstRun"),
                        "Error: There should be a first run tweak")
        self.assertEqual(tweak.process.source.firstRun,
                         'customTypeCms.untracked.uint32(1)',
                         "Error: The first run should be 1")

        job["mask"]["FirstRun"] = 5
        tweak = PSetTweak()
        WMTweaks.makeJobTweak(job, tweak)

        self.assertTrue(hasattr(tweak.process.source, "firstRun"),
                        "Error: There should be a first run tweak")
        self.assertEqual(tweak.process.source.firstRun,
                         'customTypeCms.untracked.uint32(5)',
                         "Error: The first run should be 5")
Example #6
0
    def handlePerformanceSettings(self):
        """
        _handlePerformanceSettings_

        Install the standard performance report services
        """
        tweak = PSetTweak()
        # include the default performance report services
        if getattr(self.step.data.application.command, 'silentMemoryCheck',
                   False):
            tweak.addParameter(
                "process.SimpleMemoryCheck",
                "customTypeCms.Service('SimpleMemoryCheck', jobReportOutputOnly=cms.untracked.bool(True))"
            )
        else:
            tweak.addParameter("process.SimpleMemoryCheck",
                               "customTypeCms.Service('SimpleMemoryCheck')")

        tweak.addParameter("process.CPU", "customTypeCms.Service('CPU')")
        tweak.addParameter("process.Timing", "customTypeCms.Service('Timing')")
        self.applyPsetTweak(tweak)
        self.tweak.addParameter("process.Timing.summaryOnly",
                                "customTypeCms.untracked(cms.bool(True))")

        return
Example #7
0
    def applyTweak(self, psetTweak):
        """
        _applyTweak_

        Apply a tweak to the process.
        """
        tweak = PSetTweak()
        tweak.unpersist(psetTweak)
        applyTweak(self.process, tweak, self.fixupDict)
        return
Example #8
0
    def applyTweak(self, psetTweak):
        """
        _applyTweak_

        Apply a tweak to the process.
        """
        tweak = PSetTweak()
        tweak.unpersist(psetTweak)
        applyTweak(self.process, tweak, self.fixupDict)
        return
Example #9
0
 def __init__(self, crabPSet=False):
     ScriptInterface.__init__(self)
     self.crabPSet = crabPSet
     self.process = None
     self.jobBag = None
     self.logger = logging.getLogger()
     self.tweak = PSetTweak()
     self.scram = None
     self.configPickle = "Pset.pkl"
     self.psetFile = None
Example #10
0
    def __call__(self, process):
        tweak = PSetTweak()
        # handle process parameters
        processParams = []
        [
            processParams.extend(expandParameter(process, param).keys())
            for param in self.processLevel
        ]

        [
            tweak.addParameter(param, getParameter(process, param))
            for param in processParams if hasParameter(process, param)
        ]

        # output modules
        tweak.addParameter('process.outputModules_', [])
        for outMod in process.outputModules_():
            tweak.getParameter('process.outputModules_').append(outMod)
            outModRef = getattr(process, outMod)
            for param in self.outModLevel:
                fullParam = "process.%s.%s" % (outMod, param)
                if hasParameter(outModRef, param, True):
                    tweak.addParameter(fullParam,
                                       getParameter(outModRef, param, True))

        return tweak
Example #11
0
    def __call__(self, process):
        tweak = PSetTweak()
        # handle process parameters
        processParams = []
        [ processParams.extend( expandParameter(process, param).keys())
          for param in self.processLevel]


        [ tweak.addParameter(param, getParameter(process, param))
          for param in processParams if hasParameter(process, param) ]

        # output modules
        tweak.addParameter('process.outputModules_', [])
        for outMod in process.outputModules_():
            tweak.getParameter('process.outputModules_').append(outMod)
            outModRef = getattr(process, outMod)
            for param in self.outModLevel:
                fullParam = "process.%s.%s" % (outMod, param)
                if hasParameter(outModRef, param, True):
                    tweak.addParameter(
                        fullParam,
                        getParameter(outModRef,
                                     param,
                                     True))


        return tweak
Example #12
0
    def testFirstRun(self):
        """
        _testFirstRun_
        Verify that when we set the FirstRun in the mask, it is set in the
        process but when it is not, then no firstRun appears in the process.
        This for jobs with real input files.
        """
        job = Job()
        job["input_files"] = [{"lfn": "bogusFile", "parents": []}]
        job["mask"] = Mask()

        tweak = PSetTweak()
        WMTweaks.makeJobTweak(job, tweak)

        self.assertFalse(hasattr(tweak.process.source, "firstRun"),
                         "Error: There should be no firstRun tweak.")

        job["mask"]["FirstRun"] = 93
        tweak = WMTweaks.makeJobTweak(job, tweak)

        self.assertTrue(hasattr(tweak.process.source, "firstRun"),
                        "Error: There should be a firstRun tweak.")
        self.assertEqual(tweak.process.source.firstRun,
                         'customTypeCms.untracked.uint32(93)',
                         "Error: The firstRun tweak should be 93.")
        return
Example #13
0
    def applyPsetTweak(self,
                       psetTweak,
                       skipIfSet=False,
                       allowFailedTweaks=False,
                       name='',
                       cleanupTweak=False):
        procScript = "edm_pset_tweak.py"
        psetTweakJson = os.path.join(self.stepSpace.location,
                                     "PSetTweak%s.json" % name)
        psetTweak.persist(psetTweakJson, formatting='simplejson')

        cmd = "%s --input_pkl %s --output_pkl %s --json %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location,
                         self.configPickle), psetTweakJson)
        if skipIfSet:
            cmd += " --skip_if_set"
        if allowFailedTweaks:
            cmd += " --allow_failed_tweaks"
        self.scramRun(cmd)

        if cleanupTweak is True:
            psetTweak = PSetTweak()

        return
Example #14
0
def makeTaskTweak(stepSection):
    """
    _makeTaskTweak_

    Create a tweak for options in the task that apply to all jobs.
    """
    result = PSetTweak()

    # GlobalTag
    if hasattr(stepSection, "application"):
        if hasattr(stepSection.application, "configuration"):
            if hasattr(stepSection.application.configuration, "arguments"):
                globalTag = getattr(stepSection.application.configuration.arguments,
                                    "globalTag", None)
                if globalTag != None:
                    result.addParameter("process.GlobalTag.globaltag", globalTag)

    return result
Example #15
0
    def testC(self):
        """test building a tweak from the seeds"""
        job = Job("TestJob")
        seeder = AutomaticSeeding()

        job.addBaggageParameter("process.RandomNumberGeneratorService.seed1.initialSeed", 123445)
        job.addBaggageParameter("process.RandomNumberGeneratorService.seed2.initialSeed", 123445)
        job.addBaggageParameter("process.RandomNumberGeneratorService.seed3.initialSeed", 7464738)
        job.addBaggageParameter("process.RandomNumberGeneratorService.seed44.initialSeed", 98273762)


        seeder(job)

        tweak = PSetTweak()
        for x in job.baggage.process.RandomNumberGeneratorService:
            parameter = "process.RandomNumberGeneratorService.%s.initialSeed" % x._internal_name
            tweak.addParameter(parameter, x.initialSeed)
        print(tweak)
Example #16
0
def makeTaskTweak(stepSection):
    """
    _makeTaskTweak_

    Create a tweak for options in the task that apply to all jobs.
    """
    result = PSetTweak()

    # GlobalTag
    if hasattr(stepSection, "application"):
        if hasattr(stepSection.application, "configuration"):
            if hasattr(stepSection.application.configuration, "pickledarguments"):
                args = pickle.loads(stepSection.application.configuration.pickledarguments)
                if args.has_key('globalTag'):
                    result.addParameter("process.GlobalTag.globaltag", args['globalTag'])
                if args.has_key('globalTagTransaction'):
                    result.addParameter("process.GlobalTag.DBParameters.transactionId", args['globalTagTransaction'])

    return result
Example #17
0
    def fixupProcess(self):
        """
        _fixupProcess_
        Look over the process object and make sure that all of the attributes
        that we expect to exist actually exist.
        """
        # Make sure that for each output module the following parameters exist
        # in the PSet returned from the framework:
        #   fileName
        #   logicalFileName
        #   dataset.dataTier
        #   dataset.filterName
        if hasattr(self.process, "outputModules"):
            outputModuleNames = list(self.process.outputModules)
        elif hasattr(self.process, "outputModules_"):
            outputModuleNames = self.process.outputModules_()
        elif hasattr(self.process, "_Process__outputmodules"):
            outputModuleNames = list(self.process._Process__outputmodules)
        else:
            msg = "Error loading output modules from process"
            raise AttributeError(msg)

        for outMod in outputModuleNames:
            tweak = PSetTweak()
            self.logger.info("DEBUG output module = %s", outMod)
            tweak.addParameter("process.options",
                               "customTypeCms.untracked.PSet()")
            tweak.addParameter(
                "process.%s.dataset" % outMod,
                "customTypeCms.untracked.PSet(dataTier=cms.untracked.string(''), filterName=cms.untracked.string(''))"
            )
            self.applyPsetTweak(tweak, skipIfSet=True, cleanupTweak=True)
            #tweak.addParameter("process.%s.dataset.dataTier" % outMod, "customTypeCms.untracked.string('')")
            #tweak.addParameter("process.%s.dataset.filterName" % outMod, "customTypeCms.untracked.string('')")
            tweak.addParameter("process.%s.fileName" % outMod,
                               "customTypeCms.untracked.string('')")
            tweak.addParameter("process.%s.logicalFileName" % outMod,
                               "customTypeCms.untracked.string('')")
            self.applyPsetTweak(tweak, skipIfSet=True)

        return
Example #18
0
    def handleSeeding(self):
        """
        _handleSeeding_

        Handle Random Seed settings for the job
        """
        baggage = self.job.getBaggage()
        seeding = getattr(baggage, "seeding", None)
        if seeding == "ReproducibleSeeding":
            randService = self.process.RandomNumberGeneratorService
            tweak = PSetTweak()
            for x in randService:
                parameter = "process.RandomNumberGeneratorService.%s.initialSeed" % x._internal_name
                tweak.addParameter(parameter, x.initialSeed)
            applyTweak(self.process, tweak, self.fixupDict)
        else:
            if hasattr(self.process, "RandomNumberGeneratorService"):
                from IOMC.RandomEngine.RandomServiceHelper import RandomNumberServiceHelper
                helper = RandomNumberServiceHelper(self.process.RandomNumberGeneratorService)
                helper.populate()
        return
Example #19
0
    def testFirstLumiMC(self):
        """
        _testFirstLumiMC_
        Verify that we set the lumi in a MC job and it gets into
        process.source.firstRun parameter, and if we don't at least we
        get the counter there.
        """
        job = Job()
        job["input_files"] = [{"lfn": "MCFakeFile", "parents": []}]
        job["mask"] = Mask()
        job["counter"] = 5
        job["mask"]["FirstEvent"] = 100

        try:
            tweak = PSetTweak()
            WMTweaks.makeJobTweak(job, tweak)
            self.assertRaises(WMTweakMaskError, WMTweaks.makeJobTweak, job)
        except WMTweakMaskError:
            pass

        job["mask"]["FirstLumi"] = 200
        tweak = PSetTweak()
        WMTweaks.makeJobTweak(job, tweak)

        self.assertTrue(hasattr(tweak.process.source, "firstLuminosityBlock"),
                        "Error: There should be a first lumi tweak")
        self.assertEqual(tweak.process.source.firstLuminosityBlock,
                         'customTypeCms.untracked.uint32(200)',
                         "Error: The first luminosity block should be 200")

        job["mask"]["FirstLumi"] = 10
        tweak = PSetTweak()
        WMTweaks.makeJobTweak(job, tweak)

        self.assertTrue(hasattr(tweak.process.source, "firstLuminosityBlock"),
                        "Error: There should be a first lumi tweak")
        self.assertEqual(tweak.process.source.firstLuminosityBlock,
                         'customTypeCms.untracked.uint32(10)',
                         "Error: The first luminosity block should be 10")
Example #20
0
def makeOutputTweak(outMod, job):
    """
    _makeOutputTweak_

    Make a PSetTweak for the output module and job instance provided

    """
    result = PSetTweak()
    # output filenames
    modName = str(getattr(outMod, "_internal_name"))
    fileName = "%s.root" % modName

    result.addParameter("process.%s.fileName" % modName, fileName)

    lfnBase = str(getattr(outMod, "lfnBase", None))
    if lfnBase != None:
        lfn = "%s/%s/%s.root" % (lfnBase, lfnGroup(job), modName)
        result.addParameter("process.%s.logicalFileName" % modName, lfn)

    # TODO: Nice standard way to meddle with the other parameters in the
    #      output module based on the settings in the section

    return result
Example #21
0
def makeTaskTweak(stepSection):
    """
    _makeTaskTweak_

    Create a tweak for options in the task that apply to all jobs.
    """
    result = PSetTweak()

    # GlobalTag
    if hasattr(stepSection, "application"):
        if hasattr(stepSection.application, "configuration"):
            if hasattr(stepSection.application.configuration, "pickledarguments"):
                args = pickle.loads(stepSection.application.configuration.pickledarguments)
                if 'globalTag' in args:
                    result.addParameter("process.GlobalTag.globaltag", args['globalTag'])
                if 'globalTagTransaction' in args:
                    result.addParameter("process.GlobalTag.DBParameters.transactionId", args['globalTagTransaction'])

    return result
Example #22
0
def makeOutputTweak(outMod, job):
    """
    _makeOutputTweak_

    Make a PSetTweak for the output module and job instance provided

    """
    result = PSetTweak()
    # output filenames
    modName = str(getattr(outMod, "_internal_name"))
    fileName = "%s.root" % modName

    result.addParameter("process.%s.fileName" % modName, fileName)

    lfnBase = str(getattr(outMod, "lfnBase", None))
    if lfnBase != None:
        lfn = "%s/%s/%s.root" % (lfnBase, lfnGroup(job), modName)
        result.addParameter("process.%s.logicalFileName" % modName, lfn)

    #TODO: Nice standard way to meddle with the other parameters in the
    #      output module based on the settings in the section

    return result
Example #23
0
def makeJobTweak(job):
    """
    _makeJobTweak_

    Convert information from a WMBS Job object into a PSetTweak
    that can be used to modify a CMSSW process.
    """
    result = PSetTweak()
    baggage = job.getBaggage()

    # Check in the baggage if we are processing .lhe files
    lheInput = getattr(baggage, "lheInputFiles", False)

    # Input files and secondary input files.
    primaryFiles = []
    secondaryFiles = []
    for inputFile in job["input_files"]:
        if inputFile["lfn"].startswith("MCFakeFile"):
            # If there is a preset lumi in the mask, use it as the first
            # luminosity setting
            if job['mask'].get('FirstLumi', None) != None:
                result.addParameter("process.source.firstLuminosityBlock",
                                    job['mask']['FirstLumi'])
            else:
                #We don't have lumi information in the mask, raise an exception
                raise WMTweakMaskError(job['mask'],
                                       "No first lumi information provided")
            continue

        primaryFiles.append(inputFile["lfn"])
        for secondaryFile in inputFile["parents"]:
            secondaryFiles.append(secondaryFile["lfn"])

    if len(primaryFiles) > 0:
        result.addParameter("process.source.fileNames", primaryFiles)
        if len(secondaryFiles) > 0:
            result.addParameter("process.source.secondaryFileNames", secondaryFiles)
    elif not lheInput:
        #First event parameter should be set from whatever the mask says,
        #That should have the added protection of not going over 2^32 - 1
        #If there is nothing in the mask, then we fallback to the counter method
        if job['mask'].get('FirstEvent',None) != None:
            result.addParameter("process.source.firstEvent",
                                job['mask']['FirstEvent'])
        else:
            #No first event information in the mask, raise and error
            raise WMTweakMaskError(job['mask'],
                                   "No first event information provided in the mask")

    mask =  job['mask']

    # event limits
    maxEvents = mask.getMaxEvents()
    if maxEvents == None: maxEvents = -1
    result.addParameter("process.maxEvents.input", maxEvents)

    # We don't want to set skip events for MonteCarlo jobs which have
    # no input files.
    firstEvent = mask['FirstEvent']
    if firstEvent != None and firstEvent >= 0 and (len(primaryFiles) > 0 or lheInput):
        if lheInput:
            result.addParameter("process.source.skipEvents", firstEvent - 1)
        else:
            result.addParameter("process.source.skipEvents", firstEvent)

    firstRun = mask['FirstRun']
    if firstRun != None:
        result.addParameter("process.source.firstRun", firstRun)
    elif not len(primaryFiles):
        #Then we have a MC job, we need to set firstRun to 1
        logging.debug("MCFakeFile initiated without job FirstRun - using one.")
        result.addParameter("process.source.firstRun", 1)

    runs = mask.getRunAndLumis()
    lumisToProcess = []
    for run in runs.keys():
        lumiPairs = runs[run]
        for lumiPair in lumiPairs:
            if len(lumiPair) != 2:
                # Do nothing
                continue
            lumisToProcess.append("%s:%s-%s:%s" % (run, lumiPair[0], run, lumiPair[1]))

    if len(lumisToProcess) > 0:
        result.addParameter("process.source.lumisToProcess", lumisToProcess)

    # install any settings from the per job baggage
    procSection = getattr(baggage, "process", None)
    if procSection == None:
        return result

    baggageParams = decomposeConfigSection(procSection)
    for k,v in baggageParams.items():
        result.addParameter(k,v)


    return result
Example #24
0
class SetupCMSSWPset(ScriptInterface):
    """
    _SetupCMSSWPset_

    """
    def __init__(self, crabPSet=False):
        ScriptInterface.__init__(self)
        self.crabPSet = crabPSet
        self.process = None
        self.jobBag = None
        self.logger = logging.getLogger()
        self.tweak = PSetTweak()
        self.scram = None
        self.configPickle = "Pset.pkl"
        self.psetFile = None

    def createScramEnv(self):
        scramArchitecture = self.getScramVersion()
        cmsswVersion = self.getCmsswVersion()
        self.logger.info(
            "Creating Scram environment with scram arch: %s and CMSSW version: %s",
            scramArchitecture, cmsswVersion)

        scram = Scram(
            version=cmsswVersion,
            directory=self.stepSpace.location,
            architecture=scramArchitecture,
            initialise=self.step.data.application.setup.softwareEnvironment)
        scram.project()  # creates project area
        scram.runtime()  # creates runtime environment

        return scram

    def scramRun(self, cmdArgs):
        """
        _scramRun_

        Run command inside scram environment

        """
        self.logger.info("ScramRun command args: %s", cmdArgs)
        if self.scram:
            retval = self.scram(command=cmdArgs)
            if retval > 0:
                msg = "Error running scram process. Error code: %s" % (retval)
                logging.error(msg)
                raise RuntimeError(msg)
        else:
            raise RuntimeError("Scram is not defined")

    def createProcess(self, scenario, funcName, funcArgs):
        """
        _createProcess_

        Create a Configuration.DataProcessing PSet.

        """

        procScript = "cmssw_wm_create_process.py"

        processDic = {"scenario": scenario}
        processJson = os.path.join(self.stepSpace.location,
                                   "process_scenario.json")
        funcArgsJson = os.path.join(self.stepSpace.location,
                                    "process_funcArgs.json")

        if funcName == "merge" or funcName == "repack":
            try:
                with open(funcArgsJson, 'wb') as f:
                    json.dump(funcArgs, f)
            except Exception as ex:
                self.logger.exception(
                    "Error writing out process funcArgs json")
                raise ex
            funcArgsParam = funcArgsJson
        else:
            try:
                with open(processJson, 'wb') as f:
                    json.dump(processDic, f)
            except Exception as ex:
                self.logger.exception(
                    "Error writing out process scenario json")
                raise ex
            funcArgsParam = processJson

        cmd = "%s --output_pkl %s --funcname %s --funcargs %s" % (
            procScript, os.path.join(
                self.stepSpace.location,
                self.configPickle), funcName, funcArgsParam)

        if funcName == "merge":
            if getattr(self.jobBag, "useErrorDataset", False):
                cmd += " --useErrorDataset"

        self.scramRun(cmd)
        return

    def loadPSet(self):
        """
        _loadPSet_

        Load a PSet that was shipped with the job sandbox.
        Mock actual Pset values that depend on CMSSW, as these are
        handled externally.

        """
        self.logger.info("Working dir: %s", os.getcwd())
        # Pickle original pset configuration
        procScript = "edm_pset_pickler.py"
        cmd = "%s --input %s --output_pkl %s" % (
            procScript, os.path.join(self.stepSpace.location, self.psetFile),
            os.path.join(self.stepSpace.location, self.configPickle))
        self.scramRun(cmd)

        try:
            with open(os.path.join(self.stepSpace.location, self.configPickle),
                      'rb') as f:
                self.process = Unpickler(f).load()
        except ImportError as ex:
            msg = "Unable to import pset from %s:\n" % self.psetFile
            msg += str(ex)
            self.logger.error(msg)
            raise ex

        return

    def fixupProcess(self):
        """
        _fixupProcess_
        Look over the process object and make sure that all of the attributes
        that we expect to exist actually exist.
        """
        # Make sure that for each output module the following parameters exist
        # in the PSet returned from the framework:
        #   fileName
        #   logicalFileName
        #   dataset.dataTier
        #   dataset.filterName
        if hasattr(self.process, "outputModules"):
            outputModuleNames = list(self.process.outputModules)
        elif hasattr(self.process, "outputModules_"):
            outputModuleNames = self.process.outputModules_()
        elif hasattr(self.process, "_Process__outputmodules"):
            outputModuleNames = list(self.process._Process__outputmodules)
        else:
            msg = "Error loading output modules from process"
            raise AttributeError(msg)

        for outMod in outputModuleNames:
            tweak = PSetTweak()
            self.logger.info("DEBUG output module = %s", outMod)
            tweak.addParameter("process.options",
                               "customTypeCms.untracked.PSet()")
            tweak.addParameter(
                "process.%s.dataset" % outMod,
                "customTypeCms.untracked.PSet(dataTier=cms.untracked.string(''), filterName=cms.untracked.string(''))"
            )
            self.applyPsetTweak(tweak, skipIfSet=True, cleanupTweak=True)
            #tweak.addParameter("process.%s.dataset.dataTier" % outMod, "customTypeCms.untracked.string('')")
            #tweak.addParameter("process.%s.dataset.filterName" % outMod, "customTypeCms.untracked.string('')")
            tweak.addParameter("process.%s.fileName" % outMod,
                               "customTypeCms.untracked.string('')")
            tweak.addParameter("process.%s.logicalFileName" % outMod,
                               "customTypeCms.untracked.string('')")
            self.applyPsetTweak(tweak, skipIfSet=True)

        return

    def applyPsetTweak(self,
                       psetTweak,
                       skipIfSet=False,
                       allowFailedTweaks=False,
                       name='',
                       cleanupTweak=False):
        procScript = "edm_pset_tweak.py"
        psetTweakJson = os.path.join(self.stepSpace.location,
                                     "PSetTweak%s.json" % name)
        psetTweak.persist(psetTweakJson, formatting='simplejson')

        cmd = "%s --input_pkl %s --output_pkl %s --json %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location,
                         self.configPickle), psetTweakJson)
        if skipIfSet:
            cmd += " --skip_if_set"
        if allowFailedTweaks:
            cmd += " --allow_failed_tweaks"
        self.scramRun(cmd)

        if cleanupTweak is True:
            psetTweak = PSetTweak()

        return

    def handleSeeding(self):
        """
        _handleSeeding_

        Handle Random Seed settings for the job
        """
        seeding = getattr(self.jobBag, "seeding", None)
        seedJson = os.path.join(self.stepSpace.location,
                                "reproducible_seed.json")
        self.logger.info("Job seeding set to: %s", seeding)
        procScript = "cmssw_handle_random_seeds.py"

        cmd = "%s --input_pkl %s --output_pkl %s --seeding %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location, self.configPickle), seeding)

        if seeding == "ReproducibleSeeding":
            randService = self.jobBag.process.RandomNumberGeneratorService
            seedParams = {}
            for x in randService:
                parameter = "process.RandomNumberGeneratorService.%s.initialSeed" % x._internal_name
                seedParams[parameter] = x.initialSeed
            try:
                with open(seedJson, 'wb') as f:
                    json.dump(seedParams, f)
            except Exception as ex:
                self.logger.exception(
                    "Error writing out process funcArgs json:")
                raise ex
            cmd += " --reproducible_json %s" % (seedJson)

        self.scramRun(cmd)
        return

    def handlePerformanceSettings(self):
        """
        _handlePerformanceSettings_

        Install the standard performance report services
        """
        tweak = PSetTweak()
        # include the default performance report services
        if getattr(self.step.data.application.command, 'silentMemoryCheck',
                   False):
            tweak.addParameter(
                "process.SimpleMemoryCheck",
                "customTypeCms.Service('SimpleMemoryCheck', jobReportOutputOnly=cms.untracked.bool(True))"
            )
        else:
            tweak.addParameter("process.SimpleMemoryCheck",
                               "customTypeCms.Service('SimpleMemoryCheck')")

        tweak.addParameter("process.CPU", "customTypeCms.Service('CPU')")
        tweak.addParameter("process.Timing", "customTypeCms.Service('Timing')")
        self.applyPsetTweak(tweak)
        self.tweak.addParameter("process.Timing.summaryOnly",
                                "customTypeCms.untracked(cms.bool(True))")

        return

    def makeThreadsStreamsTweak(self):
        """
        _makeThreadsStreamsTweak_

        Tweak threads and streams paraameters
        """
        origCores = int(
            getattr(self.step.data.application.multicore, 'numberOfCores', 1))
        eventStreams = int(
            getattr(self.step.data.application.multicore, 'eventStreams', 0))
        resources = {'cores': origCores}
        resizeResources(resources)
        numCores = resources['cores']
        if numCores != origCores:
            self.logger.info(
                "Resizing a job with nStreams != nCores. Setting nStreams = nCores. This may end badly."
            )
            eventStreams = 0

        tweak = PSetTweak()
        tweak.addParameter("process.options", "customTypeCms.untracked.PSet()")
        self.applyPsetTweak(tweak, skipIfSet=True)
        self.tweak.addParameter(
            "process.options.numberOfThreads",
            "customTypeCms.untracked.uint32(%s)" % numCores)
        self.tweak.addParameter(
            "process.options.numberOfStreams",
            "customTypeCms.untracked.uint32(%s)" % eventStreams)

        return

    def handleChainedProcessingTweak(self):
        """
        _handleChainedProcessing_

        In order to handle chained processing it's necessary to feed
        output of one step/task (nomenclature ambiguous) to another.
        This method creates particular mapping in a working Trivial
        File Catalog (TFC).
        """
        self.logger.info("Handling chained processing job")
        # first, create an instance of TrivialFileCatalog to override
        tfc = TrivialFileCatalog()
        # check the jobs input files
        inputFile = ("../%s/%s.root" %
                     (self.step.data.input.inputStepName,
                      self.step.data.input.inputOutputModule))
        tfc.addMapping("direct",
                       inputFile,
                       inputFile,
                       mapping_type="lfn-to-pfn")
        tfc.addMapping("direct",
                       inputFile,
                       inputFile,
                       mapping_type="pfn-to-lfn")

        self.tweak.addParameter(
            'process.source.fileNames',
            "customTypeCms.untracked.vstring(%s)" % [inputFile])
        self.tweak.addParameter(
            "process.maxEvents",
            "customTypeCms.untracked.PSet(input=cms.untracked.int32(-1))")

        tfcName = "override_catalog.xml"
        tfcPath = os.path.join(os.getcwd(), tfcName)
        self.logger.info("Creating override TFC and saving into '%s'", tfcPath)
        tfcStr = tfc.getXML()
        with open(tfcPath, 'w') as tfcFile:
            tfcFile.write(tfcStr)

        self.step.data.application.overrideCatalog = "trivialcatalog_file:" + tfcPath + "?protocol=direct"

        return

    def handlePileup(self):
        """
        _handlePileup_

        Handle pileup settings.
        There has been stored pileup configuration stored in a JSON file
        as a result of DBS querrying when running PileupFetcher,
        this method loads this configuration from sandbox and returns it
        as dictionary.
        The PileupFetcher was called by WorkQueue which creates job's sandbox
        and sandbox gets migrated to the worker node.

        External script iterates over all modules and over all pileup configuration types.
        The only considered types are "data" and "mc" (input to this method).
        If other pileup types are specified by the user, the method doesn't
        modify anything.
        The method considers only files which are present on this local PNN.
        The job will use only those, unless it was told to trust the PU site
        location (trustPUSitelists=True), in this case ALL the blocks/files
        will be added to the PSet and files will be read via AAA.
        Dataset, divided into blocks, may not have all blocks present on a
        particular PNN. However, all files belonging into a block will be
        present when reported by DBS.
        The structure of the pileupDict: PileupFetcher._queryDbsAndGetPileupConfig

        """
        # find out local site SE name
        siteConfig = loadSiteLocalConfig()
        PhEDExNodeName = siteConfig.localStageOut["phedex-node"]
        self.logger.info("Running on site '%s', local PNN: '%s'",
                         siteConfig.siteName, PhEDExNodeName)
        jsonPileupConfig = os.path.join(self.stepSpace.location,
                                        "pileupconf.json")

        # Load pileup json
        try:
            with open(jsonPileupConfig) as jdata:
                pileupDict = json.load(jdata)
        except IOError:
            m = "Could not read pileup JSON configuration file: '%s'" % jsonPileupConfig
            raise RuntimeError(m)

        # Create a json with a list of files and events available
        # after dealing with PhEDEx/AAA logic
        newPileupDict = {}
        fileList = []
        eventsAvailable = 0
        for pileupType in self.step.data.pileup.listSections_():
            useAAA = True if getattr(self.jobBag, 'trustPUSitelists',
                                     False) else False
            self.logger.info("Pileup set to read data remotely: %s", useAAA)
            for blockName in sorted(pileupDict[pileupType].keys()):
                blockDict = pileupDict[pileupType][blockName]
                if PhEDExNodeName in blockDict["PhEDExNodeNames"] or useAAA:
                    eventsAvailable += int(blockDict.get('NumberOfEvents', 0))
                    for fileLFN in blockDict["FileList"]:
                        fileList.append(str(fileLFN))
            newPileupDict[pileupType] = {
                "eventsAvailable": eventsAvailable,
                "FileList": fileList
            }
        newJsonPileupConfig = os.path.join(self.stepSpace.location,
                                           "CMSSWPileupConfig.json")
        self.logger.info("Generating json for CMSSW pileup script")
        try:
            with open(newJsonPileupConfig, 'wb') as f:
                json.dump(newPileupDict, f)
        except Exception as ex:
            self.logger.exception("Error writing out process filelist json:")
            raise ex

        procScript = "cmssw_handle_pileup.py"
        cmd = "%s --input_pkl %s --output_pkl %s --pileup_dict %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location,
                         self.configPickle), newJsonPileupConfig)

        if getattr(self.jobBag, "skipPileupEvents", None):
            randomSeed = self.job['task']
            skipPileupEvents = self.jobBag.skipPileupEvents
            cmd += " --skip_pileup_events %s --random_seed %s" % (
                skipPileupEvents, randomSeed)
        self.scramRun(cmd)

        return

    def handleProducersNumberOfEvents(self):
        """
        _handleProducersNumberOfEvents_

        Some producer modules are initialized with a maximum number of events
        to be generated, usually based on the process.maxEvents.input attribute
        but after that is tweaked the producers number of events need to
        be fixed as well. This method takes care of that.
        """

        procScript = "cmssw_handle_nEvents.py"
        cmd = "%s --input_pkl %s --output_pkl %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location, self.configPickle))
        self.scramRun(cmd)

        return

    def handleDQMFileSaver(self):
        """
        _handleDQMFileSaver_

        Harvesting jobs have the dqmFileSaver EDAnalyzer that must
        be tweaked with the dataset name in order to store it
        properly in the DQMGUI, others tweaks can be added as well
        """

        runIsComplete = getattr(self.jobBag, "runIsComplete", False)
        multiRun = getattr(self.jobBag, "multiRun", False)
        runLimits = getattr(self.jobBag, "runLimits", "")
        self.logger.info(
            "DQMFileSaver set to multiRun: %s, runIsComplete: %s, runLimits: %s",
            multiRun, runIsComplete, runLimits)

        procScript = "cmssw_handle_dqm_filesaver.py"

        cmd = "%s --input_pkl %s --output_pkl %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location, self.configPickle))

        if hasattr(self.step.data.application.configuration,
                   "pickledarguments"):
            args = pickle.loads(
                self.step.data.application.configuration.pickledarguments)
            datasetName = args.get('datasetName', None)
        if datasetName:
            cmd += " --datasetName %s" % (datasetName)
        if multiRun and runLimits:
            cmd += " --multiRun --runLimits=%s" % (runLimits)
        if runIsComplete:
            cmd += " --runIsComplete"
        self.scramRun(cmd)

        return

    def handleLHEInput(self):
        """
        _handleLHEInput_

        Enable lazy-download for jobs reading LHE articles from CERN, such
        that these jobs can read data remotely
        """

        if getattr(self.jobBag, "lheInputFiles", False):
            self.logger.info("Enabling 'lazy-download' for lheInputFiles job")
            self._enableLazyDownload()

        return

    def handleRepackSettings(self):
        """
        _handleRepackSettings_

        Repacking small events is super inefficient reading directly from EOS.
        """
        self.logger.info("Hardcoding read/cache strategies for repack")
        self._enableLazyDownload()
        return

    def _enableLazyDownload(self):
        """
        _enableLazyDownload_

        Set things to read data remotely
        """
        procScript = "cmssw_enable_lazy_download.py"
        cmd = "%s --input_pkl %s --output_pkl %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location, self.configPickle))
        self.scramRun(cmd)

        return

    def handleSingleCoreOverride(self):
        """
        _handleSingleCoreOverride_

        Make sure job only uses one core and one stream in CMSSW
        """
        try:
            if int(self.step.data.application.multicore.numberOfCores) > 1:
                self.step.data.application.multicore.numberOfCores = 1
        except AttributeError:
            pass

        try:
            if int(self.step.data.application.multicore.eventStreams) > 0:
                self.step.data.application.multicore.eventStreams = 0
        except AttributeError:
            pass

        return

    def handleSpecialCERNMergeSettings(self, funcName):
        """
        _handleSpecialCERNMergeSettings_

        CERN has a 30ms latency between Meyrin and Wigner, which kills merge performance
        Enable lazy-download for fastCloning for all CMSSW_7_5 jobs (currently off)
        Enable lazy-download for all merge jobs
        """
        if self.getCmsswVersion().startswith("CMSSW_7_5") and False:
            self.logger.info("Using fastCloning/lazydownload")
            self._enableLazyDownload()
        elif funcName == "merge":
            self.logger.info("Using lazydownload")
            self._enableLazyDownload()

        return

    def handleCondorStatusService(self):
        """
        _handleCondorStatusService_

        Enable CondorStatusService for CMSSW releases that support it.
        """
        procScript = "cmssw_handle_condor_status_service.py"
        cmd = "%s --input_pkl %s --output_pkl %s --name %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location,
                         self.configPickle), self.step.data._internal_name)
        self.scramRun(cmd)

        return

    def handleEnforceGUIDInFileName(self, secondaryInput=None):
        """
        _handleEnforceGUIDInFileName_

        Enable enforceGUIDInFileName for CMSSW releases that support it.
        """
        # skip it for CRAB jobs
        if self.crabPSet:
            return

        if secondaryInput:
            inputSource = secondaryInput
            self.logger.info(
                "Evaluating enforceGUIDInFileName parameter for secondary input data."
            )
        else:
            inputSource = self.process.source

        if hasattr(inputSource, "type_"):
            inputSourceType = inputSource.type_()
        elif hasattr(inputSource, "_TypedParameterizable__type"):
            inputSourceType = inputSource._TypedParameterizable__type
        else:
            msg = "Source type could not be determined."
            self.logger.error(msg)
            raise AttributeError(msg)

        # only enable if source is PoolSource or EmbeddedRootSource
        if inputSourceType not in ["PoolSource", "EmbeddedRootSource"]:
            self.logger.info(
                "Not evaluating enforceGUIDInFileName parameter for process source %s",
                inputSourceType)
            return

        procScript = "cmssw_enforce_guid_in_filename.py"
        cmd = "%s --input_pkl %s --output_pkl %s --input_source %s" % (
            procScript, os.path.join(self.stepSpace.location,
                                     self.configPickle),
            os.path.join(self.stepSpace.location,
                         self.configPickle), inputSourceType)
        self.scramRun(cmd)

        return

    def getCmsswVersion(self):
        """
        _getCmsswVersion_

        Return a string representing the CMSSW version to be used.
        """
        if not self.crabPSet:
            return self.step.data.application.setup.cmsswVersion

        # CRAB3 needs to use an environment var to get the version
        return os.environ.get("CMSSW_VERSION", "")

    def getScramVersion(self, allSteps=False):
        """
        _getScramVersion_

        Return a string representing the first Scram version to be used (or all)
        """
        if not self.crabPSet:
            scramArch = self.step.data.application.setup.scramArch
            if allSteps:
                return scramArch
            else:
                if isinstance(scramArch, list):
                    return next(iter(scramArch or []), None)

        # CRAB3 needs to use an environment var to get the version
        return os.environ.get("SCRAM_ARCH", "")

    def __call__(self):
        """
        _call_

        Examine the step configuration and construct a PSet from that.

        """
        self.logger.info("Executing SetupCMSSWPSet...")
        self.jobBag = self.job.getBaggage()
        self.configPickle = getattr(self.step.data.application.command,
                                    "configurationPickle", "PSet.pkl")
        self.psetFile = getattr(self.step.data.application.command,
                                "configuration", "PSet.py")
        self.scram = self.createScramEnv()

        scenario = getattr(self.step.data.application.configuration,
                           "scenario", None)
        if scenario is not None and scenario != "":
            self.logger.info("DEBUG: I'm in scenario")
            self.logger.info("Setting up job scenario/process")
            funcName = getattr(self.step.data.application.configuration,
                               "function", None)
            if getattr(self.step.data.application.configuration,
                       "pickledarguments", None) is not None:
                funcArgs = pickle.loads(
                    self.step.data.application.configuration.pickledarguments)
            else:
                funcArgs = {}

            # Create process
            try:
                self.createProcess(scenario, funcName, funcArgs)
            except Exception as ex:
                self.logger.exception(
                    "Error creating process for Config/DataProcessing:")
                raise ex
            # Now, load the new picked process
            try:
                with open(
                        os.path.join(self.stepSpace.location,
                                     self.configPickle), 'rb') as f:
                    self.process = Unpickler(f).load()
            except ImportError as ex:
                msg = "Unable to import pset from %s:\n" % self.psetFile
                msg += str(ex)
                self.logger.error(msg)
                raise ex

            if funcName == "repack":
                self.handleRepackSettings()

            if funcName in ["merge", "alcaHarvesting"]:
                self.handleSingleCoreOverride()

            if socket.getfqdn().endswith("cern.ch"):
                self.handleSpecialCERNMergeSettings(funcName)
        else:
            self.logger.info("DEBUG: Now in the none scenario to load PSET")
            try:
                self.loadPSet()
            except Exception as ex:
                self.logger.exception("Error loading PSet:")
                raise ex

        # Check process.source exists
        self.logger.info("Debug: Self.process")
        self.logger.info(dir(self.process))
        if getattr(self.process, "source", None) is None and getattr(
                self.process, "_Process__source", None) is None:
            msg = "Error in CMSSW PSet: process is missing attribute 'source'"
            msg += " or process.source is defined with None value."
            self.logger.error(msg)
            raise RuntimeError(msg)

        self.handleCondorStatusService()
        self.fixupProcess()

        # In case of CRAB3, the number of threads in the PSet should not be overridden
        if not self.crabPSet:
            try:
                self.makeThreadsStreamsTweak()
            except AttributeError as ex:
                self.logger.error("Failed to override numberOfThreads: %s",
                                  str(ex))

        # Apply task level tweaks
        makeTaskTweak(self.step.data, self.tweak)
        self.applyPsetTweak(self.tweak, cleanupTweak=True)

        # Check if chained processing is enabled
        # If not - apply the per job tweaks
        # If so - create an override TFC (like done in PA) and then modify thePSet accordingly
        if hasattr(self.step.data.input, "chainedProcessing"
                   ) and self.step.data.input.chainedProcessing:
            self.logger.info("Handling Chain processing tweaks")
            self.handleChainedProcessingTweak()
        else:
            self.logger.info("Creating job level tweaks")
            makeJobTweak(self.job, self.tweak)
        self.applyPsetTweak(self.tweak, cleanupTweak=True)

        # check for pileup settings presence, pileup support implementation
        # and if enabled, process pileup configuration / settings
        if hasattr(self.step.data, "pileup"):
            self.handlePileup()

        # Apply per output module PSet Tweaks
        self.logger.info("Output module section")
        cmsswStep = self.step.getTypeHelper()
        for om in cmsswStep.listOutputModules():
            mod = cmsswStep.getOutputModule(om)
            makeOutputTweak(mod, self.job, self.tweak)
        self.applyPsetTweak(self.tweak, cleanupTweak=True)

        # revlimiter for testing
        if getattr(self.step.data.application.command, "oneEventMode", False):
            self.tweak.addParameter('process.maxEvents.input',
                                    "customTypeCms.untracked.int32(1)")

        # check for random seeds and the method of seeding which is in the job baggage
        self.handleSeeding()

        # make sure default parametersets for perf reports are installed
        self.handlePerformanceSettings()

        # fixup the dqmFileSaver
        self.handleDQMFileSaver()

        # tweak for jobs reading LHE articles from CERN
        self.handleLHEInput()

        # tweak jobs for enforceGUIDInFileName
        self.handleEnforceGUIDInFileName()

        # Check if we accept skipping bad files
        if hasattr(self.step.data.application.configuration, "skipBadFiles"):
            self.tweak.addParameter(
                "process.source.skipBadFiles",
                "customTypeCms.untracked.bool(%s)" %
                self.step.data.application.configuration.skipBadFiles)

        # Apply events per lumi section if available
        if hasattr(self.step.data.application.configuration, "eventsPerLumi"):
            self.tweak.addParameter(
                "process.source.numberEventsInLuminosityBlock",
                "customTypeCms.untracked.uint32(%s)" %
                self.step.data.application.configuration.eventsPerLumi)

        # limit run time if desired
        if hasattr(self.step.data.application.configuration,
                   "maxSecondsUntilRampdown"):
            self.tweak.addParameter(
                "process.maxSecondsUntilRampdown.input",
                "customTypeCms.untracked.PSet(input=cms.untracked.int32(%s)" %
                self.step.data.application.configuration.
                maxSecondsUntilRampdown)

        # accept an overridden TFC from the step
        if hasattr(self.step.data.application, 'overrideCatalog'):
            self.logger.info("Found a TFC override: %s",
                             self.step.data.application.overrideCatalog)
            self.tweak.addParameter(
                "process.source.overrideCatalog",
                "customTypeCms.untracked.string('%s')" %
                self.step.data.application.overrideCatalog)

        configFile = self.step.data.application.command.configuration
        workingDir = self.stepSpace.location
        try:
            self.applyPsetTweak(self.tweak)

            with open("%s/%s" % (workingDir, configFile), 'w') as handle:
                handle.write("import FWCore.ParameterSet.Config as cms\n")
                handle.write("import pickle\n")
                handle.write("with open('%s', 'rb') as handle:\n" %
                             self.configPickle)
                handle.write("    process = pickle.load(handle)\n")
        except Exception as ex:
            self.logger.exception("Error writing out PSet:")
            raise ex

        # check for event numbers in the producers
        self.handleProducersNumberOfEvents()

        self.logger.info("CMSSW PSet setup completed!")

        return 0
Example #25
0
def makeJobTweak(job):
    """
    _makeJobTweak_

    Convert information from a WMBS Job object into a PSetTweak
    that can be used to modify a CMSSW process.
    """
    result = PSetTweak()

    # Input files and secondary input files.
    primaryFiles = []
    secondaryFiles = []
    for inputFile in job["input_files"]:
        if inputFile["lfn"].startswith("MCFakeFile"):
            # If there is a preset lumi in the mask, use it as the first
            # luminosity setting
            if getattr(job['mask'], 'FirstLumi', None) != None:
                result.addParameter("process.source.firstLuminosityBlock",
                                    job['mask']['FirstLumi'])
            else:
                # Then we don't have a FirstLumi
                # Set the lumi block equal to the number of the job in the
                # workflow.  Numbers start at 0, add one so lumis start at
                # one.
                logging.debug("MCFakeFile initiated without job FirstLumi - using counter.")
                result.addParameter("process.source.firstLuminosityBlock",
                                    int(job["counter"]))

            # Assign the run
            if getattr(job['mask'], 'FirstRun', None) != None:
                result.addParameter("process.source.firstRun",
                                    job['mask']['FirstRun'])
            else:
                # Then we have to get the run from the counter instead.
                logging.debug("MCFakeFile initiated without job FirstRun - using one.")
                result.addParameter("process.source.firstRun", 1)

            continue
        
        primaryFiles.append(inputFile["lfn"])
        for secondaryFile in inputFile["parents"]:
            secondaryFiles.append(secondaryFile["lfn"])

    if len(primaryFiles) > 0:
        result.addParameter("process.source.fileNames", primaryFiles)
        if len(secondaryFiles) > 0:
            result.addParameter("process.source.secondaryFileNames", secondaryFiles)    
    else:
        # We need to set the first event parameter for MC jobs but do not want
        # to set it for regular processing job.  MC jobs don't have input files
        # so we'll set it here.  
        baggage = job.getBaggage()        
        if hasattr(baggage, "eventsPerJob"):
            # Limit the event number to a 32bit unsigned int.
            counter = ((int(baggage.eventsPerJob) * (int(job["counter"]) - 1)) + 1) % (2**32 - 1)

            # Catch cases where the counter will roll over during the job.
            if (counter + int(baggage.eventsPerJob)) > (2**32 - 1):
                counter = 1
            elif counter < int(baggage.eventsPerJob):
                counter = 1
            
            result.addParameter("process.source.firstEvent", counter)

    mask =  job['mask']

    # event limits
    maxEvents = mask.getMaxEvents()
    if maxEvents == None: maxEvents = -1
    result.addParameter("process.maxEvents.input", maxEvents)

    # We don't want to set skip events for MonteCarlo jobs which have
    # no input files.
    firstEvent = mask['FirstEvent']
    if firstEvent != None and firstEvent >= 0 and len(primaryFiles) > 0:
        result.addParameter("process.source.skipEvents", firstEvent)

    firstRun = mask['FirstRun']
    if firstRun != None:
        result.addParameter("process.source.firstRun", firstRun)

    runs = mask.getRunAndLumis()
    lumisToProcess = []
    for run in runs.keys():
        lumiPairs = runs[run]
        for lumiPair in lumiPairs:
            if len(lumiPair) != 2:
                # Do nothing
                continue
            lumisToProcess.append("%s:%s-%s:%s" % (run, lumiPair[0], run, lumiPair[1]))

    if len(lumisToProcess) > 0:
        result.addParameter("process.source.lumisToProcess", lumisToProcess)
                        
    # install any settings from the per job baggage
    baggage = job.getBaggage()

    procSection = getattr(baggage, "process", None)
    if procSection == None:
        return result

    baggageParams = decomposeConfigSection(procSection)
    for k,v in baggageParams.items():
        result.addParameter(k,v)


    return result
Example #26
0
def makeJobTweak(job):
    """
    _makeJobTweak_

    Convert information from a WMBS Job object into a PSetTweak
    that can be used to modify a CMSSW process.
    """
    result = PSetTweak()
    baggage = job.getBaggage()

    # Check in the baggage if we are processing .lhe files
    lheInput = getattr(baggage, "lheInputFiles", False)

    # Input files and secondary input files.
    primaryFiles = []
    secondaryFiles = []
    for inputFile in job["input_files"]:
        if inputFile["lfn"].startswith("MCFakeFile"):
            # If there is a preset lumi in the mask, use it as the first
            # luminosity setting
            if job['mask'].get('FirstLumi', None) != None:
                result.addParameter("process.source.firstLuminosityBlock",
                                    job['mask']['FirstLumi'])
            else:
                #We don't have lumi information in the mask, raise an exception
                raise WMTweakMaskError(job['mask'],
                                       "No first lumi information provided")
            continue

        primaryFiles.append(inputFile["lfn"])
        for secondaryFile in inputFile["parents"]:
            secondaryFiles.append(secondaryFile["lfn"])

    if len(primaryFiles) > 0:
        result.addParameter("process.source.fileNames", primaryFiles)
        if len(secondaryFiles) > 0:
            result.addParameter("process.source.secondaryFileNames",
                                secondaryFiles)
    elif not lheInput:
        #First event parameter should be set from whatever the mask says,
        #That should have the added protection of not going over 2^32 - 1
        #If there is nothing in the mask, then we fallback to the counter method
        if job['mask'].get('FirstEvent', None) != None:
            result.addParameter("process.source.firstEvent",
                                job['mask']['FirstEvent'])
        else:
            #No first event information in the mask, raise and error
            raise WMTweakMaskError(
                job['mask'], "No first event information provided in the mask")

    mask = job['mask']

    # event limits
    maxEvents = mask.getMaxEvents()
    if maxEvents == None: maxEvents = -1
    result.addParameter("process.maxEvents.input", maxEvents)

    # We don't want to set skip events for MonteCarlo jobs which have
    # no input files.
    firstEvent = mask['FirstEvent']
    if firstEvent != None and firstEvent >= 0 and (len(primaryFiles) > 0
                                                   or lheInput):
        if lheInput:
            result.addParameter("process.source.skipEvents", firstEvent - 1)
        else:
            result.addParameter("process.source.skipEvents", firstEvent)

    firstRun = mask['FirstRun']
    if firstRun != None:
        result.addParameter("process.source.firstRun", firstRun)
    elif not len(primaryFiles):
        #Then we have a MC job, we need to set firstRun to 1
        logging.debug("MCFakeFile initiated without job FirstRun - using one.")
        result.addParameter("process.source.firstRun", 1)

    runs = mask.getRunAndLumis()
    lumisToProcess = []
    for run in runs.keys():
        lumiPairs = runs[run]
        for lumiPair in lumiPairs:
            if len(lumiPair) != 2:
                # Do nothing
                continue
            lumisToProcess.append("%s:%s-%s:%s" %
                                  (run, lumiPair[0], run, lumiPair[1]))

    if len(lumisToProcess) > 0:
        result.addParameter("process.source.lumisToProcess", lumisToProcess)

    # install any settings from the per job baggage
    procSection = getattr(baggage, "process", None)
    if procSection == None:
        return result

    baggageParams = decomposeConfigSection(procSection)
    for k, v in baggageParams.items():
        result.addParameter(k, v)

    return result
Example #27
0
def createScriptLines(opts, pklIn):
    """
    prepares a bash script fragment which tweaks the PSet params according to opts
    returns a string containing the script lines separated by '\n'
    """

    runAndLumis = {}
    if opts.runAndLumis:
        runAndLumis = readFileFromTarball(opts.runAndLumis,
                                          'run_and_lumis.tar.gz')
    inputFiles = {}
    if opts.inputFile:
        inputFiles = readFileFromTarball(opts.inputFile, 'input_files.tar.gz')

    # build a tweak object with the needed changes to be applied to PSet
    tweak = PSetTweak()

    # add tweaks

    # inputFile will always be present
    # inputFile can have three formats depending on wether secondary input files are used:
    # 1. a single LFN as a string : "/store/.....root"
    # 2. a list of LFNs : ["/store/.....root", "/store/....root", ...]
    # 3. a list of dictionaries (one per file) with keys: 'lfn' and 'parents'
    #   value for 'lfn' is a string, value for 'parents' is a list of {'lfn':lfn} dictionaries
    #   [{'lfn':inputlfn, 'parents':[{'lfn':parentlfn1},{'lfn':parentlfn2}], ....]},...]
    # to properly prepare the tweak we reuse code fom WMTweak.py:
    # https://github.com/dmwm/WMCore/blob/bb573b442a53717057c169b05ae4fae98f31063b/src/python/PSetTweaks/WMTweak.py#L415-L441
    primaryFiles = []
    secondaryFiles = []
    for inputFile in inputFiles:
        # make sure input is always in format 3.
        if not isinstance(inputFile, dict):
            inputFile = {'lfn': inputFile, 'parents': []}
        if inputFile["lfn"].startswith("MCFakeFile"):
            # for MC which uses "EmptySource" there must be no inputFile
            continue
        primaryFiles.append(inputFile["lfn"])
        for secondaryFile in inputFile["parents"]:
            secondaryFiles.append(secondaryFile["lfn"])
    print("Adding %d files to 'fileNames' attr" % len(primaryFiles))
    print("Adding %d files to 'secondaryFileNames' attr" % len(secondaryFiles))
    if len(primaryFiles) > 0:
        tweak.addParameter(
            "process.source.fileNames",
            "customTypeCms.untracked.vstring(%s)" % primaryFiles)
        if len(secondaryFiles) > 0:
            tweak.addParameter(
                "process.source.secondaryFileNames",
                "customTypeCms.untracked.vstring(%s)" % secondaryFiles)

    # for rearranging runsAndLumis into the structure needed by CMSSW, reuse code taken from
    # https://github.com/dmwm/WMCore/blob/bb573b442a53717057c169b05ae4fae98f31063b/src/python/PSetTweaks/WMTweak.py#L482
    if runAndLumis:
        lumisToProcess = []
        for run in runAndLumis.keys():
            lumiPairs = runAndLumis[run]
            for lumiPair in lumiPairs:
                if len(lumiPair) != 2:
                    # Do nothing
                    continue
                lumisToProcess.append("%s:%s-%s:%s" %
                                      (run, lumiPair[0], run, lumiPair[1]))
        tweak.addParameter(
            "process.source.lumisToProcess",
            "customTypeCms.untracked.VLuminosityBlockRange(%s)" %
            lumisToProcess)

    # how many events to process
    if opts.firstEvent:
        tweak.addParameter(
            "process.source.firstEvent",
            "customTypeCms.untracked.uint32(%s)" % opts.firstEvent)
    if opts.firstEvent is None or opts.lastEvent is None:
        # what to process is define in runAndLumis, we do no split by events here
        maxEvents = -1
    else:
        # for MC CRAB passes 1st/last event, but cmsRun wants 1st ev + MaxEvents
        maxEvents = int(opts.lastEvent) - int(opts.firstEvent) + 1
        opts.lastEvent = None  # for MC there has to be no lastEvent
    tweak.addParameter("process.maxEvents.input",
                       "customTypeCms.untracked.int32(%s)" % maxEvents)

    if opts.lastEvent:
        tweak.addParameter(
            "process.source.lastEvent",
            "customTypeCms.untracked.uint32(%s)" % opts.lastEvent)

    # firstLumi, firstRun and eventsPerLumi are used for MC
    if opts.firstLumi:
        tweak.addParameter(
            "process.source.firstLuminosityBlock",
            "customTypeCms.untracked.uint32(%s)" % opts.firstLumi)
    if opts.firstRun:
        tweak.addParameter(
            "process.source.firstRun",
            "customTypeCms.untracked.uint32(%s)" % opts.firstRun)
    if opts.eventsPerLumi:
        numberEventsInLuminosityBlock = "customTypeCms.untracked.uint32(%s)" % opts.eventsPerLumi
        tweak.addParameter("process.source.numberEventsInLuminosityBlock",
                           numberEventsInLuminosityBlock)

    # time-limited running is used by automatic splitting probe jobs
    if opts.maxRuntime:
        maxSecondsUntilRampdown = "customTypeCms.untracked.int32(%s)" % opts.maxRuntime
        tweak.addParameter("process.maxSecondsUntilRampdown.input",
                           maxSecondsUntilRampdown)

    # event limiter for testing
    if opts.oneEventMode in ["1", "True", True]:
        tweak.addParameter("process.maxEvents.input",
                           "customTypeCms.untracked.int32(1)")

    # make sure that FJR contains useful statistics, reuse code from
    # https://github.com/dmwm/WMCore/blob/c2fa70af3b4c5285d50e6a8bf48636232f738340/src/python/WMCore/WMRuntime/Scripts/SetupCMSSWPset.py#L289-L307
    tweak.addParameter("process.CPU", "customTypeCms.Service('CPU')")
    tweak.addParameter(
        "process.Timing",
        "customTypeCms.Service('Timing', summaryOnly=cms.untracked.bool(True))"
    )
    tweak.addParameter(
        "process.SimpleMemoryCheck",
        "customTypeCms.Service('SimpleMemoryCheck', jobReportOutputOnly=cms.untracked.bool(True))"
    )

    # tweak !
    psetTweakJson = "PSetTweak.json"
    tweak.persist(psetTweakJson, formatting='simplejson')

    procScript = "edm_pset_tweak.py"
    pklOut = pklIn + '-tweaked'
    # we always create untracked psets in our tweaks
    cmd = "%s --input_pkl %s --output_pkl %s --json %s --create_untracked_psets" % (
        procScript, pklIn, pklOut, psetTweakJson)
    commandLines = createTweakingCommandLines(cmd, pklIn, pklOut)

    # there a few more things to do which require running different EDM/CMSSW commands
    #1. enable LazyDownload of LHE files (if needed)
    if opts.lheInputFiles:
        pklOut = pklIn + '-lazy'
        procScript = "cmssw_enable_lazy_download.py"
        cmd = "%s --input_pkl %s --output_pkl %s" % (procScript, pklIn, pklOut)
        moreLines = createTweakingCommandLines(cmd, pklIn, pklOut)
        commandLines += moreLines

    #2. make sure random seeds are initialized
    pklOut = pklIn + '-seeds'
    procScript = "cmssw_handle_random_seeds.py"
    cmd = "%s --input_pkl %s --output_pkl %s --seeding dummy" % (procScript,
                                                                 pklIn, pklOut)
    moreLines += createTweakingCommandLines(cmd, pklIn, pklOut)
    commandLines += moreLines

    #3. make sure that process.maxEvents.input is propagated to Producers, see:
    # https://github.com/dmwm/WMCore/blob/85d6d423f0a85fdedf78b65ca8b7b81af9263789/src/python/WMCore/WMRuntime/Scripts/SetupCMSSWPset.py#L448-L465
    pklOut = pklIn + '-nEvents'
    procScript = 'cmssw_handle_nEvents.py'
    cmd = "%s --input_pkl %s --output_pkl %s" % (procScript, pklIn, pklOut)
    moreLines = createTweakingCommandLines(cmd, pklIn, pklOut)
    commandLines += moreLines

    return commandLines