Example #1
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        An event base splitting algorithm.  All available files are split into a
        set number of events per job.
        """
        eventsPerJob = int(kwargs.get("events_per_job", 100))
        eventsPerLumi = int(kwargs.get("events_per_lumi", eventsPerJob))
        getParents = kwargs.get("include_parents", False)
        lheInput = kwargs.get("lheInputFiles", False)
        collectionName = kwargs.get('collectionName', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))
        acdcFileList = []

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner = kwargs.get('owner')
                group = kwargs.get('group')
                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                acdcFileList = dcs.getProductionACDCInfo(
                    collectionName, filesetName, owner, group)
            except Exception, ex:
                msg = "Exception while trying to load goodRunList\n"
                msg += "Refusing to create any jobs.\n"
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                return
Example #2
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        An event base splitting algorithm.  All available files are split into a
        set number of events per job.
        """
        eventsPerJob = int(kwargs.get("events_per_job", 100))
        eventsPerLumi = int(kwargs.get("events_per_lumi", eventsPerJob))
        getParents   = kwargs.get("include_parents", False)
        lheInput = kwargs.get("lheInputFiles", False)
        collectionName  = kwargs.get('collectionName', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))
        acdcFileList = []

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL       = kwargs.get('couchURL')
                couchDB        = kwargs.get('couchDB')
                filesetName    = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner          = kwargs.get('owner')
                group          = kwargs.get('group')
                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                acdcFileList = dcs.getProductionACDCInfo(collectionName, filesetName, owner, group)
            except Exception, ex:
                msg =  "Exception while trying to load goodRunList\n"
                msg +=  "Refusing to create any jobs.\n"
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                return
Example #3
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        An event base splitting algorithm.  All available files are split into a
        set number of events per job.
        """
        eventsPerJob = int(kwargs.get("events_per_job", 100))
        eventsPerLumi = int(kwargs.get("events_per_lumi", eventsPerJob))
        getParents = kwargs.get("include_parents", False)
        lheInput = kwargs.get("lheInputFiles", False)
        collectionName = kwargs.get('collectionName', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
            self.getPerformanceParameters(kwargs.get('performance', {}))
        acdcFileList = []
        deterministicPileup = kwargs.get('deterministicPileup', False)

        if eventsPerJob <= 0 or eventsPerLumi <= 0:
            msg = "events_per_job and events_per_lumi must be positive. Their values are: "
            msg += "events_per_job: %d, events_per_lumi: %d" % (eventsPerJob, eventsPerLumi)
            raise RuntimeError(msg)

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(classname="Jobs.GetNumberOfJobsPerWorkflow")
            self.nJobs = getJobNumber.execute(workflow=self.subscription.getWorkflow().id)
            logging.info('Creating jobs in DeterministicPileup mode for %s',
                         self.subscription.workflowName())

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                logging.info('Loading ACDC info for collectionName: %s, with filesetName: %s', collectionName,
                             filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                acdcFileList = dcs.getProductionACDCInfo(collectionName, filesetName)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList\n"
                msg += "Refusing to create any jobs.\n"
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                return

        totalJobs = 0

        locationDict = self.sortByLocation()
        for location in locationDict:
            self.newGroup()
            fileList = locationDict[location]
            getRunLumiInformation = False
            for f in fileList:
                if f['lfn'].startswith("MCFakeFile"):
                    # We have one MCFakeFile, then it needs run information
                    getRunLumiInformation = True
                    break
            if getRunLumiInformation:
                if self.package == 'WMCore.WMBS':
                    loadRunLumi = self.daoFactory(classname="Files.GetBulkRunLumi")
                    fileLumis = loadRunLumi.execute(files=fileList)
                    if not fileLumis:
                        logging.warning("Empty fileLumis dict for workflow %s, subs %s.",
                                        self.subscription.workflowName(), self.subscription['id'])
                    for f in fileList:
                        lumiDict = fileLumis.get(f['id'], {})
                        for run in lumiDict:
                            f.addRun(run=Run(run, *lumiDict[run]))

            for f in fileList:
                currentEvent = f['first_event']
                eventsInFile = f['events']
                runs = list(f['runs'])
                # We got the runs, clean the file.
                f['runs'] = set()

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                if acdcFileList:
                    totalJobs = self.createACDCJobs(f, acdcFileList, timePerEvent,
                                                    sizePerEvent, memoryRequirement,
                                                    lheInput, eventsPerJob, eventsPerLumi,
                                                    deterministicPileup, totalJobs)
                    continue
                if not f['lfn'].startswith("MCFakeFile"):
                    # there might be files with 0 event that still have to be processed
                    if eventsInFile == 0:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        # Do not set LastEvent
                        self.currentJob["mask"].setMaxAndSkipEvents(None, currentEvent)
                        self.currentJob.addResourceEstimates(jobTime=0,
                                                             memory=memoryRequirement,
                                                             disk=0)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)
                        totalJobs += 1
                        logging.info("Job created for 0-event input file with %s", self.currentJob)
                    # Very very uncommon in production, but it has real input dataset
                    while eventsInFile:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        if eventsInFile >= eventsPerJob:
                            jobTime = eventsPerJob * timePerEvent
                            diskRequired = eventsPerJob * sizePerEvent
                            self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob - 1, currentEvent)
                        else:
                            jobTime = eventsInFile * timePerEvent
                            diskRequired = eventsInFile * sizePerEvent
                            self.currentJob["mask"].setMaxAndSkipEvents(eventsInFile - 1, currentEvent)
                            eventsInFile = eventsPerJob
                        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                             memory=memoryRequirement,
                                                             disk=diskRequired)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)

                        eventsInFile -= eventsPerJob
                        currentEvent += eventsPerJob
                        totalJobs += 1
                        logging.debug("Job created for real input with %s", self.currentJob)
                else:
                    # This assumes there's only one run which is the case for MC
                    lumis = runs[0].lumis
                    (firstLumi, lastLumi) = (min(lumis), max(lumis))
                    currentLumi = firstLumi
                    lumisPerJob = int(ceil(float(eventsPerJob) / eventsPerLumi))

                    while eventsInFile:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        self.currentJob.addBaggageParameter("lheInputFiles", lheInput)

                        # Limit the number of events to a unsigned 32bit int
                        if (currentEvent + eventsPerJob - 1) > (2 ** 32 - 1) and \
                                        (currentEvent + eventsInFile) > (2 ** 32 - 1):
                            currentEvent = 1

                        if eventsInFile >= eventsPerJob:
                            jobTime = eventsPerJob * timePerEvent
                            diskRequired = eventsPerJob * sizePerEvent
                            # Alan on 16/Apr/2019: inclusiveMask must be a real inclusiveMask, thus
                            # FirstEvent/FirstLumi and LastEvent/LastLumi are also processed by the job
                            self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob - 1, currentEvent)
                            self.currentJob["mask"].setMaxAndSkipLumis(lumisPerJob - 1, currentLumi)
                        else:
                            jobTime = eventsInFile * timePerEvent
                            diskRequired = eventsInFile * sizePerEvent
                            lumisPerJob = int(ceil(float(eventsInFile) / eventsPerLumi))
                            self.currentJob["mask"].setMaxAndSkipEvents(eventsInFile - 1, currentEvent)
                            self.currentJob["mask"].setMaxAndSkipLumis(lumisPerJob - 1, currentLumi)
                            eventsInFile = eventsPerJob

                        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                             memory=memoryRequirement,
                                                             disk=diskRequired)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)

                        eventsInFile -= eventsPerJob
                        currentEvent += eventsPerJob
                        currentLumi += lumisPerJob
                        totalJobs += 1
                        logging.info("Job created with mask: %s", self.currentJob['mask'])

        return
Example #4
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        An event base splitting algorithm.  All available files are split into a
        set number of events per job.
        """
        eventsPerJob = int(kwargs.get("events_per_job", 100))
        eventsPerLumi = int(kwargs.get("events_per_lumi", eventsPerJob))
        getParents = kwargs.get("include_parents", False)
        lheInput = kwargs.get("lheInputFiles", False)
        collectionName = kwargs.get('collectionName', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
            self.getPerformanceParameters(kwargs.get('performance', {}))
        acdcFileList = []
        deterministicPileup = kwargs.get('deterministicPileup', False)

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(classname="Jobs.GetNumberOfJobsPerWorkflow")
            self.nJobs = getJobNumber.execute(workflow=self.subscription.getWorkflow().id)
            logging.info('Creating %d jobs in DeterministicPileup mode', self.nJobs)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner = kwargs.get('owner')
                group = kwargs.get('group')
                logging.info('Creating jobs for ACDC fileset %s', filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                acdcFileList = dcs.getProductionACDCInfo(collectionName, filesetName, owner, group)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList\n"
                msg += "Refusing to create any jobs.\n"
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                return

        totalJobs = 0

        locationDict = self.sortByLocation()
        for location in locationDict:
            self.newGroup()
            fileList = locationDict[location]
            getRunLumiInformation = False
            for f in fileList:
                if f['lfn'].startswith("MCFakeFile"):
                    # We have one MCFakeFile, then it needs run information
                    getRunLumiInformation = True
                    break
            if getRunLumiInformation:
                if self.package == 'WMCore.WMBS':
                    loadRunLumi = self.daoFactory(classname="Files.GetBulkRunLumi")
                    fileLumis = loadRunLumi.execute(files=fileList)
                    for f in fileList:
                        lumiDict = fileLumis.get(f['id'], {})
                        for run in lumiDict.keys():
                            f.addRun(run=Run(run, *lumiDict[run]))

            for f in fileList:
                currentEvent = f['first_event']
                eventsInFile = f['events']
                runs = list(f['runs'])
                # We got the runs, clean the file.
                f['runs'] = set()

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                if acdcFileList:
                    if f['lfn'] in [x['lfn'] for x in acdcFileList]:
                        totalJobs = self.createACDCJobs(f, acdcFileList, timePerEvent,
                                                        sizePerEvent, memoryRequirement,
                                                        lheInput, eventsPerJob, eventsPerLumi,
                                                        deterministicPileup, totalJobs)
                    continue
                if not f['lfn'].startswith("MCFakeFile"):
                    # Very very uncommon, but it has real input dataset
                    if eventsInFile >= eventsPerJob:
                        while currentEvent < eventsInFile:
                            self.newJob(name=self.getJobName(length=totalJobs))
                            self.currentJob.addFile(f)
                            if eventsPerJob + currentEvent < eventsInFile:
                                jobTime = eventsPerJob * timePerEvent
                                diskRequired = eventsPerJob * sizePerEvent
                                self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob, currentEvent)
                            else:
                                jobTime = (eventsInFile - currentEvent) * timePerEvent
                                diskRequired = (eventsInFile - currentEvent) * sizePerEvent
                                self.currentJob["mask"].setMaxAndSkipEvents(None,
                                                                            currentEvent)
                            self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                                 memory=memoryRequirement,
                                                                 disk=diskRequired)
                            if deterministicPileup:
                                self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)

                            logging.debug("Job created for real input with %s", self.currentJob)
                            currentEvent += eventsPerJob
                            totalJobs += 1
                    else:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        jobTime = eventsInFile * timePerEvent
                        diskRequired = eventsInFile * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                             memory=memoryRequirement,
                                                             disk=diskRequired)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)
                        logging.debug("Last job created for real input with %s", self.currentJob)
                        totalJobs += 1
                else:
                    # This assumes there's only one run which is the case for MC
                    lumis = runs[0].lumis
                    (firstLumi, lastLumi) = (min(lumis), max(lumis))
                    currentLumi = firstLumi
                    totalEvents = 0
                    if eventsInFile >= eventsPerJob:
                        while totalEvents < eventsInFile:
                            self.newJob(name=self.getJobName(length=totalJobs))
                            self.currentJob.addFile(f)
                            self.currentJob.addBaggageParameter("lheInputFiles", lheInput)
                            lumisPerJob = int(ceil(float(eventsPerJob)
                                                   / eventsPerLumi))
                            # Limit the number of events to a unsigned 32bit int
                            eventsRemaining = eventsInFile - totalEvents
                            if (currentEvent + eventsPerJob - 1) > (2 ** 32 - 1) and (
                                    currentEvent + eventsRemaining - 1) > (2 ** 32 - 1):
                                currentEvent = 1
                            if eventsRemaining > eventsPerJob:
                                self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob,
                                                                            currentEvent)
                                self.currentJob["mask"].setMaxAndSkipLumis(lumisPerJob,
                                                                           currentLumi)
                                jobTime = eventsPerJob * timePerEvent
                                diskRequired = eventsPerJob * sizePerEvent
                            else:
                                jobTime = eventsRemaining * timePerEvent
                                diskRequired = eventsRemaining * sizePerEvent
                                lumisPerJob = int(ceil(float(eventsRemaining) / eventsPerLumi))
                                self.currentJob["mask"].setMaxAndSkipEvents(eventsRemaining,
                                                                            currentEvent)
                                self.currentJob["mask"].setMaxAndSkipLumis(lumisPerJob,
                                                                           currentLumi)

                            if deterministicPileup:
                                self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)
                            currentLumi += lumisPerJob
                            currentEvent += eventsPerJob
                            totalEvents += eventsPerJob
                            totalJobs += 1
                            self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                                 memory=memoryRequirement,
                                                                 disk=diskRequired)
                    else:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        # For MC we use firstEvent instead of skipEvents so set it to 1
                        # We must check for events going over 2**32 - 1 here too
                        if (eventsInFile + currentEvent - 1) > (2 ** 32 - 1):
                            currentEvent = 1
                        self.currentJob["mask"].setMaxAndSkipEvents(eventsInFile,
                                                                    currentEvent)
                        self.currentJob["mask"].setMaxAndSkipLumis(lastLumi -
                                                                   currentLumi + 1, currentLumi)
                        jobTime = eventsInFile * timePerEvent
                        diskRequired = eventsInFile * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                             memory=memoryRequirement,
                                                             disk=diskRequired)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)
                        totalJobs += 1
Example #5
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        An event base splitting algorithm.  All available files are split into a
        set number of events per job.
        """
        eventsPerJob = int(kwargs.get("events_per_job", 100))
        eventsPerLumi = int(kwargs.get("events_per_lumi", eventsPerJob))
        getParents = kwargs.get("include_parents", False)
        lheInput = kwargs.get("lheInputFiles", False)
        collectionName = kwargs.get('collectionName', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
            self.getPerformanceParameters(kwargs.get('performance', {}))
        acdcFileList = []
        deterministicPileup = kwargs.get('deterministicPileup', False)

        if eventsPerJob <= 0 or eventsPerLumi <= 0:
            msg = "events_per_job and events_per_lumi must be positive. Their values are: "
            msg += "events_per_job: %d, events_per_lumi: %d" % (eventsPerJob, eventsPerLumi)
            raise RuntimeError(msg)

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(classname="Jobs.GetNumberOfJobsPerWorkflow")
            self.nJobs = getJobNumber.execute(workflow=self.subscription.getWorkflow().id)
            logging.info('Creating jobs in DeterministicPileup mode for %s',
                         self.subscription.workflowName())

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                logging.info('Loading ACDC info for collectionName: %s, with filesetName: %s', collectionName,
                             filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                acdcFileList = dcs.getProductionACDCInfo(collectionName, filesetName)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList\n"
                msg += "Refusing to create any jobs.\n"
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                return

        totalJobs = 0

        locationDict = self.sortByLocation()
        for location in locationDict:
            self.newGroup()
            fileList = locationDict[location]
            getRunLumiInformation = False
            for f in fileList:
                if f['lfn'].startswith("MCFakeFile"):
                    # We have one MCFakeFile, then it needs run information
                    getRunLumiInformation = True
                    break
            if getRunLumiInformation:
                if self.package == 'WMCore.WMBS':
                    loadRunLumi = self.daoFactory(classname="Files.GetBulkRunLumi")
                    fileLumis = loadRunLumi.execute(files=fileList)
                    if not fileLumis:
                        logging.warning("Empty fileLumis dict for workflow %s, subs %s.",
                                        self.subscription.workflowName(), self.subscription['id'])
                    for f in fileList:
                        lumiDict = fileLumis.get(f['id'], {})
                        for run in lumiDict.keys():
                            f.addRun(run=Run(run, *lumiDict[run]))

            for f in fileList:
                currentEvent = f['first_event']
                eventsInFile = f['events']
                runs = list(f['runs'])
                # We got the runs, clean the file.
                f['runs'] = set()

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                if acdcFileList:
                    totalJobs = self.createACDCJobs(f, acdcFileList, timePerEvent,
                                                    sizePerEvent, memoryRequirement,
                                                    lheInput, eventsPerJob, eventsPerLumi,
                                                    deterministicPileup, totalJobs)
                    continue
                if not f['lfn'].startswith("MCFakeFile"):
                    # there might be files with 0 event that still have to be processed
                    if eventsInFile == 0:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        # Do not set LastEvent
                        self.currentJob["mask"].setMaxAndSkipEvents(None, currentEvent)
                        self.currentJob.addResourceEstimates(jobTime=0,
                                                             memory=memoryRequirement,
                                                             disk=0)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)
                        totalJobs += 1
                        logging.info("Job created for 0-event input file with %s", self.currentJob)
                    # Very very uncommon in production, but it has real input dataset
                    while eventsInFile:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        if eventsInFile >= eventsPerJob:
                            jobTime = eventsPerJob * timePerEvent
                            diskRequired = eventsPerJob * sizePerEvent
                            self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob - 1, currentEvent)
                        else:
                            jobTime = eventsInFile * timePerEvent
                            diskRequired = eventsInFile * sizePerEvent
                            self.currentJob["mask"].setMaxAndSkipEvents(eventsInFile - 1, currentEvent)
                            eventsInFile = eventsPerJob
                        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                             memory=memoryRequirement,
                                                             disk=diskRequired)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)

                        eventsInFile -= eventsPerJob
                        currentEvent += eventsPerJob
                        totalJobs += 1
                        logging.debug("Job created for real input with %s", self.currentJob)
                else:
                    # This assumes there's only one run which is the case for MC
                    lumis = runs[0].lumis
                    (firstLumi, lastLumi) = (min(lumis), max(lumis))
                    currentLumi = firstLumi
                    lumisPerJob = int(ceil(float(eventsPerJob) / eventsPerLumi))

                    while eventsInFile:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        self.currentJob.addBaggageParameter("lheInputFiles", lheInput)

                        # Limit the number of events to a unsigned 32bit int
                        if (currentEvent + eventsPerJob - 1) > (2 ** 32 - 1) and \
                                        (currentEvent + eventsInFile) > (2 ** 32 - 1):
                            currentEvent = 1

                        if eventsInFile >= eventsPerJob:
                            jobTime = eventsPerJob * timePerEvent
                            diskRequired = eventsPerJob * sizePerEvent
                            # Alan on 16/Apr/2019: inclusiveMask must be a real inclusiveMask, thus
                            # FirstEvent/FirstLumi and LastEvent/LastLumi are also processed by the job
                            self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob - 1, currentEvent)
                            self.currentJob["mask"].setMaxAndSkipLumis(lumisPerJob - 1, currentLumi)
                        else:
                            jobTime = eventsInFile * timePerEvent
                            diskRequired = eventsInFile * sizePerEvent
                            lumisPerJob = int(ceil(float(eventsInFile) / eventsPerLumi))
                            self.currentJob["mask"].setMaxAndSkipEvents(eventsInFile - 1, currentEvent)
                            self.currentJob["mask"].setMaxAndSkipLumis(lumisPerJob - 1, currentLumi)
                            eventsInFile = eventsPerJob

                        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                             memory=memoryRequirement,
                                                             disk=diskRequired)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)

                        eventsInFile -= eventsPerJob
                        currentEvent += eventsPerJob
                        currentLumi += lumisPerJob
                        totalJobs += 1
                        logging.info("Job created with mask: %s", self.currentJob['mask'])

        return