コード例 #1
0
    def getWorks(self):
        """
        Inject work into wmbs for idle sites
        """
        self.queue.logger.info("Getting work and feeding WMBS files")

        # need to make sure jobs are created
        resources, jobCounts = freeSlots(minusRunning=True,
                                         allowedStates=['Normal', 'Draining'],
                                         knownCmsSites=cmsSiteNames())

        for site in resources:
            self.queue.logger.info("I need %d jobs on site %s" %
                                   (resources[site], site))

        abortedAndForceCompleteRequests = self.abortedAndForceCompleteWorkflowCache.getData(
        )

        previousWorkList = self.queue.getWork(
            resources,
            jobCounts,
            excludeWorkflows=abortedAndForceCompleteRequests)
        self.queue.logger.info(
            "%s of units of work acquired for file creation" %
            len(previousWorkList))
        return
コード例 #2
0
    def algorithm(self, parameters):
        """
        Get work from local workqueue to be injected into WMBS/DBSBuffer
        """
        self.queue.logger.info("Getting work and feeding WMBS files...")
        try:
            # need to make sure jobs are created
            resources, jobCounts = freeSlots(
                minusRunning=True,
                allowedStates=['Normal', 'Draining'],
                knownCmsSites=cmsSiteNames())

            for site in resources:
                self.queue.logger.info("I need %d jobs on site %s" %
                                       (resources[site], site))

            abortedAndForceCompleteRequests = self.abortedAndForceCompleteWorkflowCache.getData(
            )

            previousWorkList = self.queue.getWork(
                resources,
                jobCounts,
                excludeWorkflows=abortedAndForceCompleteRequests)
            self.queue.logger.info(
                "Acquired %s units of work for WMBS file creation",
                len(previousWorkList))
        except Exception as ex:
            self.queue.logger.error("Error in wmbs inject loop: %s" % str(ex))
コード例 #3
0
    def pullWork(self, resources = None, draining_resources = None, continuousReplication = True):
        """
        Pull work from another WorkQueue to be processed

        If resources passed in get work for them, if not available resources
        from get from wmbs.
        """
        if not self.params['ParentQueueCouchUrl']:
            msg = 'Unable to pull work from parent, ParentQueueCouchUrl not provided'
            self.logger.warning(msg)
            return 0
        if not self.backend.isAvailable() or not self.parent_queue.isAvailable():
            self.logger.info('Backend busy or down: skipping work pull')
            return 0
        if self.params['DrainMode']:
            self.logger.info('Draining queue: skipping work pull')
            return 0

        if not draining_resources:
            draining_resources = {}
        if not resources:
            # find out available resources from wmbs
            from WMCore.WorkQueue.WMBSHelper import freeSlots
            sites = freeSlots(self.params['QueueDepth'], knownCmsSites = cmsSiteNames())
            draining_sites = freeSlots(self.params['QueueDepth'], onlyDrain = True)
            # resources for new work are free wmbs resources minus what we already have queued
            _, resources = self.backend.availableWork(sites)
            draining_resources = draining_sites # don't minus available as large run-anywhere could decimate

        if not resources and not draining_resources:
            self.logger.info('Not pulling more work. No free slots.')
            return 0

        left_over = self.parent_queue.getElements('Negotiating', returnIdOnly = True,
                                                  ChildQueueUrl = self.params['QueueURL'])
        if left_over:
            self.logger.info('Not pulling more work. Still replicating %d previous units' % len(left_over))
            return 0

        still_processing = self.backend.getInboxElements('Negotiating', returnIdOnly = True)
        if still_processing:
            self.logger.info('Not pulling more work. Still processing %d previous units' % len(still_processing))
            return 0

        self.logger.info("Pull work for sites %s: " % str(resources))

        work, _ = self.parent_queue.availableWork(resources, self.params['Teams'])
        # get work for draining sites (only get work for existing workflows)
        work.extend(self.parent_queue.availableWork(draining_resources, self.params['Teams'], self.backend.getWorkflows())[0])

        if not work:
            self.logger.info('No available work in parent queue.')
            return 0
        work = self._assignToChildQueue(self.params['QueueURL'], *work)

        # do this whether we have work or not - other events i.e. cancel may have happened
        self.backend.pullFromParent(continuous = continuousReplication)
        return len(work)
コード例 #4
0
    def getWorks(self):
        """
        Inject work into wmbs for idle sites
        """
        self.queue.logger.info("Getting work and feeding WMBS files")

        # need to make sure jobs are created
        resources, jobCounts = freeSlots(minusRunning = True, allowedStates = ['Normal', 'Draining'],
                              knownCmsSites = cmsSiteNames())

        for site in resources:
            self.queue.logger.info("I need %d jobs on site %s" % (resources[site], site))

        self.previousWorkList = self.queue.getWork(resources, jobCounts)
        self.queue.logger.info("%s of units of work acquired for file creation"
                               % len(self.previousWorkList))
        return
コード例 #5
0
    def getWorks(self):
        """
        Inject work into wmbs for idle sites
        """
        self.queue.logger.info("Getting work and feeding WMBS files")

        # need to make sure jobs are created
        resources = freeSlots(minusRunning=True,
                              allowedStates=['Normal', 'Draining'],
                              knownCmsSites=cmsSiteNames())

        for site in resources:
            self.queue.logger.info("I need %d jobs on site %s" %
                                   (resources[site], site))

        self.previousWorkList = self.queue.getWork(resources)
        self.queue.logger.info(
            "%s of units of work acquired for file creation" %
            len(self.previousWorkList))
        return
コード例 #6
0
    def algorithm(self, parameters):
        """
        Get work from local workqueue to be injected into WMBS/DBSBuffer
        """
        self.queue.logger.info("Getting work and feeding WMBS files...")
        try:
            # need to make sure jobs are created
            resources, jobCounts = freeSlots(minusRunning=True, allowedStates=['Normal', 'Draining'],
                                             knownCmsSites=cmsSiteNames())

            for site in resources:
                self.queue.logger.info("I need %d jobs on site %s" % (resources[site], site))

            abortedAndForceCompleteRequests = self.abortedAndForceCompleteWorkflowCache.getData()

            previousWorkList = self.queue.getWork(resources, jobCounts,
                                                  excludeWorkflows=abortedAndForceCompleteRequests)
            self.queue.logger.info("Acquired %s units of work for WMBS file creation", len(previousWorkList))
        except Exception as ex:
            self.queue.logger.error("Error in wmbs inject loop: %s" % str(ex))
コード例 #7
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.inputLocationFlag():
            # Then get the locations from the site whitelist/blacklist + SiteDB
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            if siteWhitelist:
                # Just get the ses matching the whitelists
                self.sites = siteWhitelist
            elif siteBlacklist:
                # Get all CMS sites less the blacklist
                allSites = cmsSiteNames()
                self.sites = list(set(allSites) - set (siteBlacklist))
            else:
                # Run at any CMS site
                self.sites = cmsSiteNames()

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {datasetPath : []} # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data) # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data) # check dataset name
                for block in dbs.listFileBlocks(data):
                    blocks.append(str(block))


        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                continue

            #check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] )
                #use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                #ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = 1. * accepted_lumis / float(block['NumberOfLumis'])
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block = block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName = block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']) != acceptedFileLumiCount:
                                acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents']/len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry['NumberOfEvents']
                    block[self.lumiType] = acceptedLumiCount
                    block['NumberOfFiles'] = acceptedFileCount
                    block['NumberOfEvents'] = acceptedEventCount
            # save locations
            if task.inputLocationFlag():
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))

            validBlocks.append(block)
        return validBlocks
コード例 #8
0
    def pullWork(self,
                 resources=None,
                 draining_resources=None,
                 continuousReplication=True):
        """
        Pull work from another WorkQueue to be processed

        If resources passed in get work for them, if not available resources
        from get from wmbs.
        """
        if not self.params['ParentQueueCouchUrl']:
            msg = 'Unable to pull work from parent, ParentQueueCouchUrl not provided'
            self.logger.warning(msg)
            return 0
        if not self.backend.isAvailable() or not self.parent_queue.isAvailable(
        ):
            self.logger.info('Backend busy or down: skipping work pull')
            return 0
        if self.params['DrainMode']:
            self.logger.info('Draining queue: skipping work pull')
            return 0

        if not draining_resources:
            draining_resources = {}
        if not resources:
            # find out available resources from wmbs
            from WMCore.WorkQueue.WMBSHelper import freeSlots
            sites = freeSlots(self.params['QueueDepth'],
                              knownCmsSites=cmsSiteNames())
            draining_sites = freeSlots(self.params['QueueDepth'],
                                       allowedStates=['Draining'])
            # resources for new work are free wmbs resources minus what we already have queued
            _, resources = self.backend.availableWork(sites)
            draining_resources = draining_sites  # don't minus available as large run-anywhere could decimate

        if not resources and not draining_resources:
            self.logger.info('Not pulling more work. No free slots.')
            return 0

        left_over = self.parent_queue.getElements(
            'Negotiating',
            returnIdOnly=True,
            ChildQueueUrl=self.params['QueueURL'])
        if left_over:
            self.logger.info(
                'Not pulling more work. Still replicating %d previous units' %
                len(left_over))
            return 0

        still_processing = self.backend.getInboxElements('Negotiating',
                                                         returnIdOnly=True)
        if still_processing:
            self.logger.info(
                'Not pulling more work. Still processing %d previous units' %
                len(still_processing))
            return 0

        self.logger.info("Pull work for sites %s: " % str(resources))

        work, _ = self.parent_queue.availableWork(resources,
                                                  self.params['Teams'])
        # get work for draining sites (only get work for existing workflows)
        work.extend(
            self.parent_queue.availableWork(draining_resources,
                                            self.params['Teams'],
                                            self.backend.getWorkflows())[0])

        if not work:
            self.logger.info('No available work in parent queue.')
            return 0
        work = self._assignToChildQueue(self.params['QueueURL'], *work)

        # do this whether we have work or not - other events i.e. cancel may have happened
        self.backend.pullFromParent(continuous=continuousReplication)
        return len(work)
コード例 #9
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.inputLocationFlag():
            # Then get the locations from the site whitelist/blacklist + SiteDB
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            if siteWhitelist:
                # Just get the ses matching the whitelists
                self.sites = siteWhitelist
            elif siteBlacklist:
                # Get all CMS sites less the blacklist
                allSites = cmsSiteNames()
                self.sites = list(set(allSites) - set (siteBlacklist))
            else:
                # Run at any CMS site
                self.sites = cmsSiteNames()

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {datasetPath : []} # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data) # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data) # check dataset name
                for block in dbs.listFileBlocks(data):
                    blocks.append(str(block))


        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                continue

            #check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] )
                #use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                #ratio =  lumis which are ok in the block / total num lumis
                ratio_accepted = 1. * accepted_lumis / float(block['NumberOfLumis'])
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRuns returns a run number per lumi section
                full_lumi_list = dbs.listRuns(block = block['block'])
                runs = set(full_lumi_list)

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)

                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                accepted_lumis = [x for x in full_lumi_list if x in runs]
                ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list)
                block[self.lumiType] = len(accepted_lumis)
                block['NumberOfFiles'] = float(block['NumberOfFiles']) * ratio_accepted
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted

            # save locations
            if task.inputLocationFlag():
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))

            validBlocks.append(block)
        return validBlocks