コード例 #1
0
ファイル: WMBSHelper.py プロジェクト: ticoann/WMCore
    def validFiles(self, files):
        """Apply run white/black list and return valid files"""
        runWhiteList = self.topLevelTask.inputRunWhitelist()
        runBlackList = self.topLevelTask.inputRunBlacklist()

        results = []
        for f in files:
            if type(f) == type("") or not f.has_key("LumiList"):
                results.append(f)
                continue
            runs = set([x['RunNumber'] for x in f['LumiList']])
            if runWhiteList or runBlackList:
                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore file
                if not runs:
                    continue
            #if we have a lumi mask we have to check that at least one lumi in the file is valid
            hasGoodLumi = False
            for lumi in f['LumiList']:
                #consider the runs after applying the run white/black lists
                if lumi['RunNumber'] in runs and \
                    isGoodLumi(self.topLevelTask.getLumiMask(), lumi['RunNumber'], lumi['LumiSectionNumber']):
                        hasGoodLumi = True
                        break
            #if no good lumi is found continue
            if not hasGoodLumi:
                continue
            results.append(f)
        return results
コード例 #2
0
    def validFiles(self, files):
        """Apply run white/black list and return valid files"""
        runWhiteList = self.topLevelTask.inputRunWhitelist()
        runBlackList = self.topLevelTask.inputRunBlacklist()

        results = []
        for f in files:
            if type(f) == type("") or not f.has_key("LumiList"):
                results.append(f)
                continue
            runs = set([x['RunNumber'] for x in f['LumiList']])
            if runWhiteList or runBlackList:
                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore file
                if not runs:
                    continue
            #if we have a lumi mask we have to check that at least one lumi in the file is valid
            hasGoodLumi = False
            for lumi in f['LumiList']:
                #consider the runs after applying the run white/black lists
                if lumi['RunNumber'] in runs and \
                    isGoodLumi(self.topLevelTask.getLumiMask(), lumi['RunNumber'], lumi['LumiSectionNumber']):
                    hasGoodLumi = True
                    break
            #if no good lumi is found continue
            if not hasGoodLumi:
                continue
            results.append(f)
        return results
コード例 #3
0
class EventAwareLumiBased(JobFactory):
    """
    Split jobs by lumis taking into account events per lumi
    """

    locations = []

    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        avgEventsPerJob = int(kwargs.get('events_per_job', 5000))
        eventLimit = int(kwargs.get('max_events_per_lumi', 20000))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', True))
        ignoreACDC = bool(kwargs.get('ignore_acdc_except', False))
        collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner = kwargs.get('owner')
                group = kwargs.get('group')

                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(collectionName, filesetName,
                                                   owner, group)
            except Exception, ex:
                msg = "Exception while trying to load goodRunList\n"
                if ignoreACDC:
                    msg += "Ditching goodRunList\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    goodRunList = {}
                else:
                    msg += "Refusing to create any jobs.\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    return

        lDict = self.sortByLocation()
        locationDict = {}

        # First we need to load the data
        if self.package == 'WMCore.WMBS':
            loadRunLumi = self.daoFactory(classname="Files.GetBulkRunLumi")

        for key in lDict.keys():
            newlist = []
            # First we need to load the data
            if self.package == 'WMCore.WMBS':
                fileLumis = loadRunLumi.execute(files=lDict[key])
                for f in lDict[key]:
                    lumiDict = fileLumis.get(f['id'], {})
                    for run in lumiDict.keys():
                        f.addRun(run=Run(run, *lumiDict[run]))

            for f in lDict[key]:
                if len(f['runs']) == 0:
                    continue
                f['runs'] = sorted(f['runs'])
                f['lumiCount'] = 0
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]

                #Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = float(f['events']) / f['lumiCount']
                else:
                    #No lumis in the file, ignore it
                    continue
                newlist.append(f)

            locationDict[key] = sorted(newlist,
                                       key=operator.itemgetter('lowestRun'))

        totalJobs = 0
        lastLumi = None
        firstLumi = None
        lastRun = None
        lumisInJob = 0
        currentJobAvgEventCount = 0
        for location in locationDict:

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                updateSplitOnJobStop = False
                failNextJob = False
                #If the number of events per lumi is higher than the limit
                #and it's only one lumi then ditch that lumi
                if f['avgEvtsPerLumi'] > eventLimit and f['lumiCount'] == 1:
                    failNextJob = True
                    stopJob = True
                    lumisPerJob = 1
                elif splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True
                    #Check the average number of events per lumi in this file
                    #Adapt the lumis per job to match the target conditions
                    if f['avgEvtsPerLumi']:
                        #If there are events in the file
                        ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                        lumisPerJob = max(int(math.floor(ratio)), 1)
                    else:
                        #Zero event file, then the ratio goes to infinity. Computers don't like that
                        lumisPerJob = f['lumiCount']
                else:
                    #Analyze how many events does this job already has
                    #Check how many we want as target, include as many lumi sections as possible
                    updateSplitOnJobStop = True
                    eventsRemaining = max(
                        avgEventsPerJob - currentJobAvgEventCount, 0)
                    if f['avgEvtsPerLumi']:
                        lumisAllowed = int(
                            math.floor(
                                float(eventsRemaining) / f['avgEvtsPerLumi']))
                    else:
                        lumisAllowed = f['lumiCount']
                    lumisPerJob = max(lumisInJob + lumisAllowed, 1)

                for run in f['runs']:
                    if not isGoodRun(goodRunList=goodRunList, run=run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        if not isGoodLumi(goodRunList, run=run.run, lumi=lumi):
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=run.run, lumis=[firstLumi, lastLumi])
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(
                                run=run.run, lumis=[firstLumi, lastLumi])
                            firstLumi = None
                            lastLumi = None

                        if firstLumi == None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=lastRun, lumis=[firstLumi, lastLumi])
                            msg = None
                            if failNextJob:
                                msg = "File %s has too many events (%d) in %d lumi(s)" % (
                                    f['lfn'], f['events'], f['lumiCount'])
                            self.newJob(name=self.getJobName(length=totalJobs),
                                        failedJob=failNextJob,
                                        failedReason=msg)
                            failNextJob = False
                            firstLumi = lumi
                            lumisInJob = 0
                            currentJobAvgEventCount = 0
                            totalJobs += 1

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                            if updateSplitOnJobStop:
                                #Then we were carrying from a previous file
                                #Reset calculations for this file
                                updateSplitOnJobStop = False
                                if f['avgEvtsPerLumi']:
                                    ratio = float(
                                        avgEventsPerJob) / f['avgEvtsPerLumi']
                                    lumisPerJob = max(int(math.floor(ratio)),
                                                      1)
                                else:
                                    lumisPerJob = f['lumiCount']

                        lumisInJob += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run

                        if self.currentJob and not f in self.currentJob[
                                'input_files']:
                            self.currentJob.addFile(f)

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(
                            run=run.run, lumis=[firstLumi, lastLumi])
                        firstLumi = None
                        lastLumi = None

                if not splitOnFile:
                    currentJobAvgEventCount += f['avgEvtsPerLumi'] * min(
                        lumisInJob, f['lumiCount'])

        return
コード例 #4
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split up all the available files such that each job will process a
        maximum of 'files_per_job'.  If the 'files_per_job' parameters is not
        passed in jobs will process a maximum of 10 files.
        """

        filesPerJob = int(kwargs.get("files_per_job", 10))
        jobsPerGroup = int(kwargs.get("jobs_per_group", 0))
        totalFiles = int(kwargs.get("total_files", 0))
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        runBoundaries = kwargs.get("respect_run_boundaries", False)
        getParents = kwargs.get("include_parents", False)
        filesInJob = 0
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        #Get a dictionary of sites, files
        lDict = self.sortByLocation()
        locationDict = {}

        for key in lDict:
            newlist = []
            for f in lDict[key]:
                if runs and lumis:
                    ## Skip this file is it has no runs.
                    if len(f['runs']) == 0:
                        continue
                    f['lumiCount'] = 0
                    f['runs'] = sorted(f['runs'])
                    for run in f['runs']:
                        run.lumis.sort()
                        f['lumiCount'] += len(run.lumis)
                    f['lowestRun'] = f['runs'][0]
                    ## Skip this file is it has no lumis.
                    if f['lumiCount'] == 0:
                        continue
                    ## Do average event per lumi calculation.
                    f['avgEvtsPerLumi'] = round(
                        float(f['events']) / f['lumiCount'])
                newlist.append(f)
            locationDict[key] = sorted(newlist, key=lambda f: f['lfn'])

        ## Make a list with all the files, sorting them by LFN. Remove from the list all
        ## the files filtered out by the lumi-mask (if there is one).
        files = []
        for filesPerLocSet in locationDict.values():
            for f in filesPerLocSet:
                files.append(f)
        if len(files):
            files = sorted(files, key=lambda f: f['lfn'])
            if runs and lumis:
                skippedFiles = []
                for f in files:
                    skipFile = True
                    for run in f['runs']:
                        if not isGoodRun(goodRunList, run.run):
                            continue
                        for lumi in run:
                            if not isGoodLumi(goodRunList, run.run, lumi):
                                continue
                            skipFile = False
                    if skipFile:
                        skippedFiles.append(f)
                for f in skippedFiles:
                    files.remove(f)

        ## Keep only the first totalFiles files. Remove the other files from the locationDict.
        if totalFiles > 0 and totalFiles < len(files):
            removedFiles = files[totalFiles:]
            files = files[:totalFiles]
            for f in removedFiles:
                for locSet in locationDict.keys():
                    if f in locationDict[locSet]:
                        locationDict[locSet].remove(f)

        for locSet in locationDict.keys():
            #Now we have all the files in a certain location set
            fileList = locationDict[locSet]
            filesInJob = 0
            jobsInGroup = 0
            self.newGroup()
            if len(fileList) == 0:
                continue
            jobRun = None
            for f in fileList:
                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)
                fileRun = f.get('minrun', None)
                createNewJob = False
                if filesInJob == 0 or filesInJob == filesPerJob or (
                        runBoundaries and fileRun != jobRun):
                    createNewJob = True
                if runs and lumis:
                    for run in f['runs']:
                        if not isGoodRun(goodRunList, run.run):
                            continue
                        firstLumi = None
                        lastLumi = None
                        for lumi in run:
                            if not isGoodLumi(goodRunList, run.run, lumi):
                                if firstLumi != None and lastLumi != None:
                                    self.currentJob['mask'].addRunAndLumis(
                                        run=run.run,
                                        lumis=[firstLumi, lastLumi])
                                    addedEvents = ((lastLumi - firstLumi + 1) *
                                                   f['avgEvtsPerLumi'])
                                    runAddedTime = addedEvents * timePerEvent
                                    runAddedSize = addedEvents * sizePerEvent
                                    self.currentJob.addResourceEstimates(
                                        jobTime=runAddedTime,
                                        disk=runAddedSize)
                                    firstLumi = None
                                    lastLumi = None
                                continue
                            if lastLumi != None and lumi != lastLumi + 1:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=run.run, lumis=[firstLumi, lastLumi])
                                addedEvents = ((lastLumi - firstLumi + 1) *
                                               f['avgEvtsPerLumi'])
                                runAddedTime = addedEvents * timePerEvent
                                runAddedSize = addedEvents * sizePerEvent
                                self.currentJob.addResourceEstimates(
                                    jobTime=runAddedTime, disk=runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            if createNewJob:
                                if jobsPerGroup:
                                    if jobsInGroup > jobsPerGroup:
                                        self.newGroup()
                                        jobsInGroup = 0
                                self.newJob(name=self.getJobName())
                                self.currentJob.addResourceEstimates(
                                    memory=memoryRequirement)
                                filesInJob = 0
                                jobsInGroup += 1
                                jobRun = fileRun
                                createNewJob = False
                                self.currentJob.addFile(f)
                                filesInJob += 1
                            if firstLumi == None:
                                firstLumi = lumi
                            lastLumi = lumi
                            if self.currentJob and not f in self.currentJob[
                                    'input_files']:
                                self.currentJob.addFile(f)
                                filesInJob += 1
                        if firstLumi != None and lastLumi != None:
                            self.currentJob['mask'].addRunAndLumis(
                                run=run.run, lumis=[firstLumi, lastLumi])
                            addedEvents = ((lastLumi - firstLumi + 1) *
                                           f['avgEvtsPerLumi'])
                            runAddedTime = addedEvents * timePerEvent
                            runAddedSize = addedEvents * sizePerEvent
                            self.currentJob.addResourceEstimates(
                                jobTime=runAddedTime, disk=runAddedSize)
                            firstLumi = None
                            lastLumi = None
                else:
                    if createNewJob:
                        if jobsPerGroup:
                            if jobsInGroup > jobsPerGroup:
                                self.newGroup()
                                jobsInGroup = 0
                        self.newJob(name=self.getJobName())
                        self.currentJob.addResourceEstimates(
                            memory=memoryRequirement)
                        filesInJob = 0
                        jobsInGroup += 1
                        jobRun = fileRun
                    self.currentJob.addFile(f)
                    filesInJob += 1
                    fileTime = f['events'] * timePerEvent
                    fileSize = f['events'] * sizePerEvent
                    self.currentJob.addResourceEstimates(jobTime=fileTime,
                                                         disk=fileSize)

        return
コード例 #5
0
ファイル: Harvest.py プロジェクト: PerilousApricot/WMCore
    def createJobsLocationWise(self, fileset, endOfRun, dqmHarvestUnit, lumiMask, goodRunList):

        myThread = threading.currentThread()
        fileset.loadData(parentage=0)
        allFiles = fileset.getFiles()

        # sort by location and run
        locationDict = {}
        runDict = {}
        for fileInfo in allFiles:

            locSet = frozenset(fileInfo['locations'])
            runSet = fileInfo.getRuns()

            if len(locSet) == 0:
                logging.error("File %s has no locations!", fileInfo['lfn'])
            if len(runSet) == 0:
                logging.error("File %s has no run information!", fileInfo['lfn'])

            # Populate a dictionary with [location][run] so we can split jobs according to those different combinations
            if locSet not in locationDict.keys():
                locationDict[locSet] = {}

            fileInfo['runs'] = set()
            # Handle jobs with run whitelist/blacklist
            if goodRunList:
                runDict[fileInfo['lfn']] = set()
                for run in runSet:
                    if run.run in goodRunList:
                        runDict[fileInfo['lfn']].add(run)
                        if run.run in locationDict[locSet].keys():
                            locationDict[locSet][run.run].append(fileInfo)
                        else:
                            locationDict[locSet][run.run] = [fileInfo]
            elif lumiMask:
                # it has lumiMask, thus we consider only good run/lumis
                newRunSet = []
                for run in runSet:
                    if not isGoodRun(lumiMask, run.run):
                        continue
                    # then loop over lumis
                    maskedLumis = []
                    for lumi in run.lumis:
                        if not isGoodLumi(lumiMask, run.run, lumi):
                            continue
                        maskedLumis.append(lumi)

                    if not maskedLumis:
                        continue
                    maskedRun = Run(run.run, *maskedLumis)
                    newRunSet.append(maskedRun)

                    if run.run in locationDict[locSet].keys():
                        locationDict[locSet][run.run].append(fileInfo)
                    else:
                        locationDict[locSet][run.run] = [fileInfo]
                if newRunSet:
                    runDict[fileInfo['lfn']] = newRunSet
            else:
                # no LumiList and no run white or black list
                runDict[fileInfo['lfn']] = runSet
                for run in runSet:
                    if run.run in locationDict[locSet].keys():
                        locationDict[locSet][run.run].append(fileInfo)
                    else:
                        locationDict[locSet][run.run] = [fileInfo]

        # create separate jobs for different locations
        self.newGroup()
        self.jobCount = 0
        baseName = makeUUID()
        self.newGroup()

        if endOfRun:
            harvestType = "EndOfRun"
        else:
            harvestType = "Periodic"

        for location in locationDict.keys():

            if dqmHarvestUnit == "byRun":
                self.createJobByRun(locationDict, location, baseName, harvestType, runDict, endOfRun)
            else:
                self.createMultiRunJob(locationDict, location, baseName, harvestType, runDict, endOfRun)

        return
コード例 #6
0
ファイル: FileBased.py プロジェクト: alexanderrichards/WMCore
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split up all the available files such that each job will process a
        maximum of 'files_per_job'.  If the 'files_per_job' parameters is not
        passed in jobs will process a maximum of 10 files.
        """

        filesPerJob   = int(kwargs.get("files_per_job", 10))
        jobsPerGroup  = int(kwargs.get("jobs_per_group", 0))
        totalFiles    = int(kwargs.get("total_files", 0))
        runs          = kwargs.get('runs', None)
        lumis         = kwargs.get('lumis', None)
        runBoundaries = kwargs.get("respect_run_boundaries", False)
        getParents    = kwargs.get("include_parents", False)
        filesInJob    = 0
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        #Get a dictionary of sites, files
        lDict = self.sortByLocation()
        locationDict = {}

        for key in lDict:
            newlist = []
            for f in lDict[key]:
                if runs and lumis:
                    ## Skip this file is it has no runs.
                    if len(f['runs']) == 0:
                        continue
                    f['lumiCount'] = 0
                    f['runs'] = sorted(f['runs'])
                    for run in f['runs']:
                        run.lumis.sort()
                        f['lumiCount'] += len(run.lumis)
                    f['lowestRun'] = f['runs'][0]
                    ## Skip this file is it has no lumis.
                    if f['lumiCount'] == 0:
                        continue
                    ## Do average event per lumi calculation.
                    f['avgEvtsPerLumi'] = round(float(f['events']) / f['lumiCount'])
                newlist.append(f)
            locationDict[key] = sorted(newlist, key = lambda f: f['lfn'])

        ## Make a list with all the files, sorting them by LFN. Remove from the list all
        ## the files filtered out by the lumi-mask (if there is one).
        files = []
        for filesPerLocSet in locationDict.values():
            for f in filesPerLocSet:
                files.append(f)
        if len(files):
            files = sorted(files, key = lambda f: f['lfn'])
            if runs and lumis:
                skippedFiles = []
                for f in files:
                    skipFile = True
                    for run in f['runs']:
                        if not isGoodRun(goodRunList, run.run):
                            continue
                        for lumi in run:
                            if not isGoodLumi(goodRunList, run.run, lumi):
                                continue
                            skipFile = False
                    if skipFile:
                        skippedFiles.append(f)
                for f in skippedFiles:
                    files.remove(f)

        ## Keep only the first totalFiles files. Remove the other files from the locationDict.
        if totalFiles > 0 and totalFiles < len(files):
            removedFiles = files[totalFiles:]
            files = files[:totalFiles]
            for f in removedFiles:
                for locSet in locationDict.keys():
                    if f in locationDict[locSet]:
                        locationDict[locSet].remove(f)

        for locSet in locationDict.keys():
            #Now we have all the files in a certain location set
            fileList = locationDict[locSet]
            filesInJob  = 0
            jobsInGroup = 0
            self.newGroup()
            if len(fileList) == 0:
                continue
            jobRun = None
            for f in fileList:
                if getParents:
                    parentLFNs = self.findParent(lfn = f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn = lfn)
                        f['parents'].add(parent)
                fileRun = f.get('minrun', None)
                createNewJob = False
                if filesInJob == 0 or filesInJob == filesPerJob or (runBoundaries and fileRun != jobRun):
                    createNewJob = True
                if runs and lumis:
                    for run in f['runs']:
                        if not isGoodRun(goodRunList, run.run):
                            continue
                        firstLumi = None
                        lastLumi = None
                        for lumi in run:
                            if not isGoodLumi(goodRunList, run.run, lumi):
                                if firstLumi != None and lastLumi != None:
                                    self.currentJob['mask'].addRunAndLumis(run = run.run, lumis = [firstLumi, lastLumi])
                                    addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                    runAddedTime = addedEvents * timePerEvent
                                    runAddedSize = addedEvents * sizePerEvent
                                    self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                                    firstLumi = None
                                    lastLumi = None
                                continue
                            if lastLumi != None and lumi != lastLumi + 1:
                                self.currentJob['mask'].addRunAndLumis(run = run.run, lumis = [firstLumi, lastLumi])
                                addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = addedEvents * timePerEvent
                                runAddedSize = addedEvents * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            if createNewJob:
                                if jobsPerGroup:
                                    if jobsInGroup > jobsPerGroup:
                                        self.newGroup()
                                        jobsInGroup = 0
                                self.newJob(name = self.getJobName())
                                self.currentJob.addResourceEstimates(memory = memoryRequirement)
                                filesInJob = 0
                                jobsInGroup += 1
                                jobRun = fileRun
                                createNewJob = False
                                self.currentJob.addFile(f)
                                filesInJob += 1
                            if firstLumi == None:
                                firstLumi = lumi
                            lastLumi = lumi
                            if self.currentJob and not f in self.currentJob['input_files']:
                                self.currentJob.addFile(f)
                                filesInJob += 1
                        if firstLumi != None and lastLumi != None:
                            self.currentJob['mask'].addRunAndLumis(run = run.run, lumis = [firstLumi, lastLumi])
                            addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                            runAddedTime = addedEvents * timePerEvent
                            runAddedSize = addedEvents * sizePerEvent
                            self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                            firstLumi = None
                            lastLumi = None
                else:
                    if createNewJob:
                        if jobsPerGroup:
                            if jobsInGroup > jobsPerGroup:
                                self.newGroup()
                                jobsInGroup = 0
                        self.newJob(name = self.getJobName())
                        self.currentJob.addResourceEstimates(memory = memoryRequirement)
                        filesInJob = 0
                        jobsInGroup += 1
                        jobRun = fileRun
                    self.currentJob.addFile(f)
                    filesInJob += 1
                    fileTime = f['events'] * timePerEvent
                    fileSize = f['events'] * sizePerEvent
                    self.currentJob.addResourceEstimates(jobTime = fileTime, disk = fileSize)

        return
コード例 #7
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        avgEventsPerJob = int(kwargs.get('events_per_job', 5000))
        jobLimit = int(kwargs.get('job_limit', 0))
        jobTimeLimit = int(
            kwargs.get('job_time_limit', self.defaultJobTimeLimit))
        totalEvents = int(kwargs.get('total_events', 0))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', False))
        self.collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        applyLumiCorrection = bool(kwargs.get('applyLumiCorrection', False))
        deterministicPileup = kwargs.get('deterministicPileup', False)

        timePerEvent, sizePerEvent, memoryRequirement = \
            self.getPerformanceParameters(kwargs.get('performance', {}))

        eventsPerLumiInDataset = 0

        if avgEventsPerJob <= 0:
            msg = "events_per_job parameter must be positive. Its value is: %d" % avgEventsPerJob
            raise RuntimeError(msg)

        if self.package == 'WMCore.WMBS':
            self.loadRunLumi = self.daoFactory(
                classname="Files.GetBulkRunLumi")
            if deterministicPileup:
                getJobNumber = self.daoFactory(
                    classname="Jobs.GetNumberOfJobsPerWorkflow")
                self.nJobs = getJobNumber.execute(
                    workflow=self.subscription.getWorkflow().id)
                logging.info(
                    'Creating jobs in DeterministicPileup mode for %s',
                    self.subscription.workflowName())

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if self.collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')

                logging.info('Creating jobs for ACDC fileset %s', filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(self.collectionName,
                                                   filesetName)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList. "
                msg += "Refusing to create any jobs.\nDetails: %s" % str(ex)
                logging.exception(msg)
                return

        lDict = self.getFilesSortedByLocation(avgEventsPerJob)
        if not lDict:
            logging.info(
                "There are not enough events/files to be splitted. Trying again next cycle"
            )
            return

        locationDict = {}
        for key in lDict.keys():
            newlist = []
            # First we need to load the data
            if self.loadRunLumi:
                fileLumis = self.loadRunLumi.execute(files=lDict[key])
                if not fileLumis:
                    logging.warning(
                        "Empty fileLumis dict for workflow %s, subs %s.",
                        self.subscription.workflowName(),
                        self.subscription['id'])
                for f in lDict[key]:
                    lumiDict = fileLumis.get(f['id'], {})
                    for run in lumiDict.keys():
                        f.addRun(run=Run(run, *lumiDict[run]))

            for f in lDict[key]:
                if len(f['runs']) == 0:
                    continue
                f['runs'] = sorted(f['runs'])
                f['lumiCount'] = 0
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]

                # Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = round(
                        float(f['events']) / f['lumiCount'])
                    if deterministicPileup:
                        # We assume that all lumis are equal in the dataset
                        eventsPerLumiInDataset = f['avgEvtsPerLumi']
                else:
                    # No lumis in the file, ignore it
                    continue
                newlist.append(f)

            locationDict[key] = sorted(newlist,
                                       key=operator.itemgetter('lowestRun'))

        totalJobs = 0
        lastLumi = None
        firstLumi = None
        lastRun = None
        lumisInJob = 0
        totalAvgEventCount = 0
        currentJobAvgEventCount = 0
        stopTask = False
        self.lumiChecker = LumiChecker(applyLumiCorrection)
        for location in locationDict:

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                lumisInJobInFile = 0
                updateSplitOnJobStop = False
                failNextJob = False
                # If estimated job time is higher the job time limit (condor limit)
                # and it's only one lumi then ditch that lumi
                timePerLumi = f['avgEvtsPerLumi'] * timePerEvent
                if timePerLumi > jobTimeLimit and f['lumiCount'] == 1:
                    failNextJob = True
                    stopJob = True
                    lumisPerJob = 1
                elif splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True
                    # Check the average number of events per lumi in this file
                    # Adapt the lumis per job to match the target conditions
                    if f['avgEvtsPerLumi']:
                        # If there are events in the file
                        ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                        lumisPerJob = max(int(math.floor(ratio)), 1)
                    else:
                        # Zero event file, then the ratio goes to infinity. Computers don't like that
                        lumisPerJob = f['lumiCount']
                else:
                    # Analyze how many events does this job already has
                    # Check how many we want as target, include as many lumi sections as possible
                    updateSplitOnJobStop = True
                    eventsRemaining = max(
                        avgEventsPerJob - currentJobAvgEventCount, 0)
                    if f['avgEvtsPerLumi']:
                        lumisAllowed = int(
                            math.floor(
                                float(eventsRemaining) / f['avgEvtsPerLumi']))
                    else:
                        lumisAllowed = f['lumiCount']
                    lumisPerJob = max(lumisInJob + lumisAllowed, 1)

                for run in f['runs']:
                    if not isGoodRun(goodRunList=goodRunList, run=run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        if (not isGoodLumi(goodRunList, run=run.run, lumi=lumi)
                                or self.lumiChecker.isSplitLumi(
                                    run.run, lumi, f)):
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=run.run, lumis=[firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) *
                                               f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(
                                    jobTime=runAddedTime, disk=runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(
                                run=run.run, lumis=[firstLumi, lastLumi])
                            eventsAdded = ((lastLumi - firstLumi + 1) *
                                           f['avgEvtsPerLumi'])
                            runAddedTime = eventsAdded * timePerEvent
                            runAddedSize = eventsAdded * sizePerEvent
                            self.currentJob.addResourceEstimates(
                                jobTime=runAddedTime, disk=runAddedSize)
                            firstLumi = None
                            lastLumi = None

                        if firstLumi is None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=lastRun, lumis=[firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) *
                                               f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(
                                    jobTime=runAddedTime, disk=runAddedSize)
                            msg = None
                            if failNextJob:
                                msg = "File %s has a single lumi %s, in run %s " % (
                                    f['lfn'], lumi, run.run)
                                msg += "with too many events %d and it woud take %d sec to run" \
                                       % (f['events'], timePerLumi)
                            self.lumiChecker.closeJob(self.currentJob)
                            self.newJob(name=self.getJobName(),
                                        failedJob=failNextJob,
                                        failedReason=msg)
                            if deterministicPileup:
                                skipEvents = (
                                    self.nJobs -
                                    1) * lumisPerJob * eventsPerLumiInDataset
                                self.currentJob.addBaggageParameter(
                                    "skipPileupEvents", skipEvents)
                            self.currentJob.addResourceEstimates(
                                memory=memoryRequirement)
                            failNextJob = False
                            firstLumi = lumi
                            lumisInJob = 0
                            lumisInJobInFile = 0
                            currentJobAvgEventCount = 0
                            totalJobs += 1
                            if jobLimit and totalJobs > jobLimit:
                                msg = "Job limit of {0} jobs exceeded.".format(
                                    jobLimit)
                                raise RuntimeError(msg)

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                            if updateSplitOnJobStop:
                                # Then we were carrying from a previous file
                                # Reset calculations for this file
                                updateSplitOnJobStop = False
                                if f['avgEvtsPerLumi']:
                                    ratio = float(
                                        avgEventsPerJob) / f['avgEvtsPerLumi']
                                    lumisPerJob = max(int(math.floor(ratio)),
                                                      1)
                                else:
                                    lumisPerJob = f['lumiCount']

                        lumisInJob += 1
                        lumisInJobInFile += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run
                        totalAvgEventCount += f['avgEvtsPerLumi']

                        if self.currentJob and not f in self.currentJob[
                                'input_files']:
                            self.currentJob.addFile(f)

                        # We stop here if there are more total events than requested.
                        if totalEvents > 0 and totalAvgEventCount >= totalEvents:
                            stopTask = True
                            break

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(
                            run=run.run, lumis=[firstLumi, lastLumi])
                        eventsAdded = ((lastLumi - firstLumi + 1) *
                                       f['avgEvtsPerLumi'])
                        runAddedTime = eventsAdded * timePerEvent
                        runAddedSize = eventsAdded * sizePerEvent
                        self.currentJob.addResourceEstimates(
                            jobTime=runAddedTime, disk=runAddedSize)
                        firstLumi = None
                        lastLumi = None

                    if stopTask:
                        break

                if not splitOnFile:
                    currentJobAvgEventCount += f[
                        'avgEvtsPerLumi'] * lumisInJobInFile

                if stopTask:
                    break

            if stopTask:
                break

        self.lumiChecker.closeJob(self.currentJob)
        self.lumiChecker.fixInputFiles()
        return
コード例 #8
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        avgEventsPerJob = int(kwargs.get('events_per_job', 5000))
        eventLimit      = int(kwargs.get('max_events_per_lumi', 20000))
        totalEvents     = int(kwargs.get('total_events', 0))
        splitOnFile     = bool(kwargs.get('halt_job_on_file_boundaries', True))
        ignoreACDC      = bool(kwargs.get('ignore_acdc_except', False))
        collectionName  = kwargs.get('collectionName', None)
        splitOnRun      = kwargs.get('splitOnRun', True)
        getParents      = kwargs.get('include_parents', False)
        runWhitelist    = kwargs.get('runWhitelist', [])
        runs            = kwargs.get('runs', None)
        lumis           = kwargs.get('lumis', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))
        deterministicPileup = kwargs.get('deterministicPileup', False)
        eventsPerLumiInDataset = 0

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow")
            jobNumber = getJobNumber.execute(workflow = self.subscription.getWorkflow().id)
            self.nJobs = jobNumber

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL       = kwargs.get('couchURL')
                couchDB        = kwargs.get('couchDB')
                filesetName    = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner          = kwargs.get('owner')
                group          = kwargs.get('group')

                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(collectionName, filesetName, owner, group)
            except Exception as ex:
                msg =  "Exception while trying to load goodRunList\n"
                if ignoreACDC:
                    msg +=  "Ditching goodRunList\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    goodRunList = {}
                else:
                    msg +=  "Refusing to create any jobs.\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    return

        lDict = self.sortByLocation()
        locationDict = {}

        # First we need to load the data
        if self.package == 'WMCore.WMBS':
            loadRunLumi = self.daoFactory(classname = "Files.GetBulkRunLumi")

        for key in lDict.keys():
            newlist = []
            # First we need to load the data
            if self.package == 'WMCore.WMBS':
                fileLumis = loadRunLumi.execute(files = lDict[key])
                for f in lDict[key]:
                    lumiDict = fileLumis.get(f['id'], {})
                    for run in lumiDict.keys():
                        f.addRun(run = Run(run, *lumiDict[run]))

            for f in lDict[key]:
                if len(f['runs']) == 0:
                    continue
                f['runs'] = sorted(f['runs'])
                f['lumiCount'] = 0
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]

                #Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = round(float(f['events'])/f['lumiCount'])
                    if deterministicPileup:
                        # We assume that all lumis are equal in the dataset
                        eventsPerLumiInDataset = f['avgEvtsPerLumi']
                else:
                    #No lumis in the file, ignore it
                    continue
                newlist.append(f)


            locationDict[key] = sorted(newlist, key=operator.itemgetter('lowestRun'))

        totalJobs      = 0
        lastLumi       = None
        firstLumi      = None
        lastRun        = None
        lumisInJob     = 0
        totalAvgEventCount = 0
        currentJobAvgEventCount = 0
        stopTask = False
        for location in locationDict:

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:

                if getParents:
                    parentLFNs = self.findParent(lfn = f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn = lfn)
                        f['parents'].add(parent)

                lumisInJobInFile = 0
                updateSplitOnJobStop = False
                failNextJob          = False
                #If the number of events per lumi is higher than the limit
                #and it's only one lumi then ditch that lumi
                if f['avgEvtsPerLumi'] > eventLimit and f['lumiCount'] == 1:
                    failNextJob = True
                    stopJob = True
                    lumisPerJob = 1
                elif splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True
                    #Check the average number of events per lumi in this file
                    #Adapt the lumis per job to match the target conditions
                    if f['avgEvtsPerLumi']:
                        #If there are events in the file
                        ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                        lumisPerJob = max(int(math.floor(ratio)), 1)
                    else:
                        #Zero event file, then the ratio goes to infinity. Computers don't like that
                        lumisPerJob = f['lumiCount']
                else:
                    #Analyze how many events does this job already has
                    #Check how many we want as target, include as many lumi sections as possible
                    updateSplitOnJobStop = True
                    eventsRemaining = max(avgEventsPerJob - currentJobAvgEventCount, 0)
                    if f['avgEvtsPerLumi']:
                        lumisAllowed = int(math.floor(float(eventsRemaining) / f['avgEvtsPerLumi']))
                    else:
                        lumisAllowed = f['lumiCount']
                    lumisPerJob = max(lumisInJob + lumisAllowed, 1)

                for run in f['runs']:
                    if not isGoodRun(goodRunList = goodRunList, run = run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        if not isGoodLumi(goodRunList, run = run.run, lumi = lumi):
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                                       lumis = [firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                                   lumis = [firstLumi, lastLumi])
                            eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                            runAddedTime = eventsAdded * timePerEvent
                            runAddedSize = eventsAdded * sizePerEvent
                            self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                            firstLumi = None
                            lastLumi = None

                        if firstLumi == None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(run = lastRun,
                                                                       lumis = [firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                            msg = None
                            if failNextJob:
                                msg = "File %s has too many events (%d) in %d lumi(s)" % (f['lfn'],
                                                                                          f['events'],
                                                                                          f['lumiCount'])
                            self.newJob(name = self.getJobName(), failedJob = failNextJob,
                                        failedReason = msg)
                            if deterministicPileup:
                                self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * lumisPerJob * eventsPerLumiInDataset)
                            self.currentJob.addResourceEstimates(memory = memoryRequirement)
                            failNextJob = False
                            firstLumi = lumi
                            lumisInJob = 0
                            lumisInJobInFile = 0
                            currentJobAvgEventCount = 0
                            totalJobs += 1

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                            if updateSplitOnJobStop:
                                #Then we were carrying from a previous file
                                #Reset calculations for this file
                                updateSplitOnJobStop = False
                                if f['avgEvtsPerLumi']:
                                    ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                                    lumisPerJob = max(int(math.floor(ratio)), 1)
                                else:
                                    lumisPerJob = f['lumiCount']

                        lumisInJob += 1
                        lumisInJobInFile += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run
                        totalAvgEventCount += f['avgEvtsPerLumi']

                        if self.currentJob and not f in self.currentJob['input_files']:
                            self.currentJob.addFile(f)

                        # We stop here if there are more total events than requested.
                        if totalEvents > 0 and totalAvgEventCount >= totalEvents:
                            stopTask = True
                            break

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                               lumis = [firstLumi, lastLumi])
                        eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                        runAddedTime = eventsAdded * timePerEvent
                        runAddedSize = eventsAdded * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                        firstLumi = None
                        lastLumi = None

                    if stopTask:
                        break

                if not splitOnFile:
                    currentJobAvgEventCount += f['avgEvtsPerLumi'] * lumisInJobInFile

                if stopTask:
                    break

            if stopTask:
                break

        return
コード例 #9
0
    def createJobsLocationWise(self, fileset, endOfRun, dqmHarvestUnit,
                               lumiMask, goodRunList):

        myThread = threading.currentThread()
        fileset.loadData(parentage=0)
        allFiles = fileset.getFiles()

        # sort by location and run
        locationDict = {}
        runDict = {}
        for fileInfo in allFiles:

            locSet = frozenset(fileInfo['locations'])
            runSet = fileInfo.getRuns()

            if len(locSet) == 0:
                logging.error("File %s has no locations!", fileInfo['lfn'])
            if len(runSet) == 0:
                logging.error("File %s has no run information!",
                              fileInfo['lfn'])

            # Populate a dictionary with [location][run] so we can split jobs according to those different combinations
            if locSet not in locationDict.keys():
                locationDict[locSet] = {}

            fileInfo['runs'] = set()
            # Handle jobs with run whitelist/blacklist
            if goodRunList:
                runDict[fileInfo['lfn']] = set()
                for run in runSet:
                    if run.run in goodRunList:
                        runDict[fileInfo['lfn']].add(run)
                        if run.run in locationDict[locSet].keys():
                            locationDict[locSet][run.run].append(fileInfo)
                        else:
                            locationDict[locSet][run.run] = [fileInfo]
            elif lumiMask:
                # it has lumiMask, thus we consider only good run/lumis
                newRunSet = []
                for run in runSet:
                    if not isGoodRun(lumiMask, run.run):
                        continue
                    # then loop over lumis
                    maskedLumis = []
                    for lumi in run.lumis:
                        if not isGoodLumi(lumiMask, run.run, lumi):
                            continue
                        maskedLumis.append(lumi)

                    if not maskedLumis:
                        continue
                    maskedRun = Run(run.run, *maskedLumis)
                    newRunSet.append(maskedRun)

                    if run.run in locationDict[locSet].keys():
                        locationDict[locSet][run.run].append(fileInfo)
                    else:
                        locationDict[locSet][run.run] = [fileInfo]
                if newRunSet:
                    runDict[fileInfo['lfn']] = newRunSet
            else:
                # no LumiList and no run white or black list
                runDict[fileInfo['lfn']] = runSet
                for run in runSet:
                    if run.run in locationDict[locSet].keys():
                        locationDict[locSet][run.run].append(fileInfo)
                    else:
                        locationDict[locSet][run.run] = [fileInfo]

        # create separate jobs for different locations
        self.newGroup()
        self.jobCount = 0
        baseName = makeUUID()
        self.newGroup()

        if endOfRun:
            harvestType = "EndOfRun"
        else:
            harvestType = "Periodic"

        for location in locationDict.keys():

            if dqmHarvestUnit == "byRun":
                self.createJobByRun(locationDict, location, baseName,
                                    harvestType, runDict, endOfRun)
            else:
                self.createMultiRunJob(locationDict, location, baseName,
                                       harvestType, runDict, endOfRun)

        return
コード例 #10
0
ファイル: EventAwareLumiBased.py プロジェクト: dmwm/WMCore
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        avgEventsPerJob = int(kwargs.get('events_per_job', 5000))
        jobLimit = int(kwargs.get('job_limit', 0))
        jobTimeLimit = int(kwargs.get('job_time_limit', self.defaultJobTimeLimit))
        totalEvents = int(kwargs.get('total_events', 0))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', False))
        self.collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        applyLumiCorrection = bool(kwargs.get('applyLumiCorrection', False))
        deterministicPileup = kwargs.get('deterministicPileup', False)
        allowCreationFailure = kwargs.get('allowCreationFailure', True)

        timePerEvent, sizePerEvent, memoryRequirement = \
            self.getPerformanceParameters(kwargs.get('performance', {}))

        eventsPerLumiInDataset = 0

        if avgEventsPerJob <= 0:
            msg = "events_per_job parameter must be positive. Its value is: %d" % avgEventsPerJob
            raise RuntimeError(msg)

        if self.package == 'WMCore.WMBS':
            self.loadRunLumi = self.daoFactory(classname="Files.GetBulkRunLumi")
            if deterministicPileup:
                getJobNumber = self.daoFactory(classname="Jobs.GetNumberOfJobsPerWorkflow")
                self.nJobs = getJobNumber.execute(workflow=self.subscription.getWorkflow().id)
                logging.info('Creating jobs in DeterministicPileup mode for %s',
                             self.subscription.workflowName())

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if self.collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')

                logging.info('Creating jobs for ACDC fileset %s', filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(self.collectionName, filesetName)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList. "
                msg += "Refusing to create any jobs.\nDetails: %s" % str(ex)
                logging.exception(msg)
                return

        lDict = self.getFilesSortedByLocation(avgEventsPerJob)
        if not lDict:
            logging.info("There are not enough events/files to be splitted. Trying again next cycle")
            return

        locationDict = {}
        for key in lDict.keys():
            newlist = []
            # First we need to load the data
            if self.loadRunLumi:
                fileLumis = self.loadRunLumi.execute(files=lDict[key])
                if not fileLumis:
                    logging.warning("Empty fileLumis dict for workflow %s, subs %s.",
                                    self.subscription.workflowName(), self.subscription['id'])
                for f in lDict[key]:
                    lumiDict = fileLumis.get(f['id'], {})
                    for run in lumiDict.keys():
                        f.addRun(run=Run(run, *lumiDict[run]))

            for f in lDict[key]:
                if len(f['runs']) == 0:
                    continue
                f['runs'] = sorted(f['runs'])
                f['lumiCount'] = 0
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]

                # Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = round(float(f['events']) / f['lumiCount'])
                    if deterministicPileup:
                        # We assume that all lumis are equal in the dataset
                        eventsPerLumiInDataset = f['avgEvtsPerLumi']
                else:
                    # No lumis in the file, ignore it
                    continue
                newlist.append(f)

            locationDict[key] = sorted(newlist, key=operator.itemgetter('lowestRun'))

        totalJobs = 0
        lastLumi = None
        firstLumi = None
        lastRun = None
        lumisInJob = 0
        totalAvgEventCount = 0
        currentJobAvgEventCount = 0
        stopTask = False
        self.lumiChecker = LumiChecker(applyLumiCorrection)
        for location in locationDict:

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                lumisInJobInFile = 0
                updateSplitOnJobStop = False
                failNextJob = False
                # If estimated job time is higher the job time limit (condor limit)
                # and it's only one lumi then ditch that lumi
                timePerLumi = f['avgEvtsPerLumi'] * timePerEvent
                if timePerLumi > jobTimeLimit and f['lumiCount'] == 1:
                    lumisPerJob = 1
                    stopJob = True
                    if allowCreationFailure:
                        failNextJob = True
                elif splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True
                    # Check the average number of events per lumi in this file
                    # Adapt the lumis per job to match the target conditions
                    if f['avgEvtsPerLumi']:
                        # If there are events in the file
                        ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                        lumisPerJob = max(int(math.floor(ratio)), 1)
                    else:
                        # Zero event file, then the ratio goes to infinity. Computers don't like that
                        lumisPerJob = f['lumiCount']
                else:
                    # Analyze how many events does this job already has
                    # Check how many we want as target, include as many lumi sections as possible
                    updateSplitOnJobStop = True
                    eventsRemaining = max(avgEventsPerJob - currentJobAvgEventCount, 0)
                    if f['avgEvtsPerLumi']:
                        lumisAllowed = int(math.floor(float(eventsRemaining) / f['avgEvtsPerLumi']))
                    else:
                        lumisAllowed = f['lumiCount']
                    lumisPerJob = max(lumisInJob + lumisAllowed, 1)

                for run in f['runs']:
                    if not isGoodRun(goodRunList=goodRunList, run=run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        if (not isGoodLumi(goodRunList, run=run.run, lumi=lumi) or
                                self.lumiChecker.isSplitLumi(run.run, lumi, f)):
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(run=run.run,
                                                                       lumis=[firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime=runAddedTime, disk=runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(run=run.run,
                                                                   lumis=[firstLumi, lastLumi])
                            eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                            runAddedTime = eventsAdded * timePerEvent
                            runAddedSize = eventsAdded * sizePerEvent
                            self.currentJob.addResourceEstimates(jobTime=runAddedTime, disk=runAddedSize)
                            firstLumi = None
                            lastLumi = None

                        if firstLumi is None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(run=lastRun,
                                                                       lumis=[firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime=runAddedTime, disk=runAddedSize)
                            msg = None
                            if failNextJob:
                                msg = "File %s has a single lumi %s, in run %s " % (f['lfn'], lumi, run.run)
                                msg += "with too many events %d and it woud take %d sec to run" \
                                       % (f['events'], timePerLumi)
                            self.lumiChecker.closeJob(self.currentJob)
                            self.newJob(name=self.getJobName(), failedJob=failNextJob, failedReason=msg)
                            if deterministicPileup:
                                skipEvents = (self.nJobs - 1) * lumisPerJob * eventsPerLumiInDataset
                                self.currentJob.addBaggageParameter("skipPileupEvents", skipEvents)
                            self.currentJob.addResourceEstimates(memory=memoryRequirement)
                            failNextJob = False
                            firstLumi = lumi
                            lumisInJob = 0
                            lumisInJobInFile = 0
                            currentJobAvgEventCount = 0
                            totalJobs += 1
                            if jobLimit and totalJobs > jobLimit:
                                msg = "Job limit of {0} jobs exceeded.".format(jobLimit)
                                raise RuntimeError(msg)

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                            if updateSplitOnJobStop:
                                # Then we were carrying from a previous file
                                # Reset calculations for this file
                                updateSplitOnJobStop = False
                                if f['avgEvtsPerLumi']:
                                    ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                                    lumisPerJob = max(int(math.floor(ratio)), 1)
                                else:
                                    lumisPerJob = f['lumiCount']

                        lumisInJob += 1
                        lumisInJobInFile += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run
                        totalAvgEventCount += f['avgEvtsPerLumi']

                        if self.currentJob and not f in self.currentJob['input_files']:
                            self.currentJob.addFile(f)

                        # We stop here if there are more total events than requested.
                        if totalEvents > 0 and totalAvgEventCount >= totalEvents:
                            stopTask = True
                            break

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(run=run.run,
                                                               lumis=[firstLumi, lastLumi])
                        eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                        runAddedTime = eventsAdded * timePerEvent
                        runAddedSize = eventsAdded * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime=runAddedTime, disk=runAddedSize)
                        firstLumi = None
                        lastLumi = None

                    if stopTask:
                        break

                if not splitOnFile:
                    currentJobAvgEventCount += f['avgEvtsPerLumi'] * lumisInJobInFile

                if stopTask:
                    break

            if stopTask:
                break

        self.lumiChecker.closeJob(self.currentJob)
        self.lumiChecker.fixInputFiles()
        return