def getWorks(self): """ Inject work into wmbs for idle sites """ self.queue.logger.info("Getting work and feeding WMBS files") # need to make sure jobs are created resources, jobCounts = freeSlots(minusRunning=True, allowedStates=['Normal', 'Draining'], knownCmsSites=cmsSiteNames()) for site in resources: self.queue.logger.info("I need %d jobs on site %s" % (resources[site], site)) abortedAndForceCompleteRequests = self.abortedAndForceCompleteWorkflowCache.getData( ) previousWorkList = self.queue.getWork( resources, jobCounts, excludeWorkflows=abortedAndForceCompleteRequests) self.queue.logger.info( "%s of units of work acquired for file creation" % len(previousWorkList)) return
def algorithm(self, parameters): """ Get work from local workqueue to be injected into WMBS/DBSBuffer """ self.queue.logger.info("Getting work and feeding WMBS files...") try: # need to make sure jobs are created resources, jobCounts = freeSlots( minusRunning=True, allowedStates=['Normal', 'Draining'], knownCmsSites=cmsSiteNames()) for site in resources: self.queue.logger.info("I need %d jobs on site %s" % (resources[site], site)) abortedAndForceCompleteRequests = self.abortedAndForceCompleteWorkflowCache.getData( ) previousWorkList = self.queue.getWork( resources, jobCounts, excludeWorkflows=abortedAndForceCompleteRequests) self.queue.logger.info( "Acquired %s units of work for WMBS file creation", len(previousWorkList)) except Exception as ex: self.queue.logger.error("Error in wmbs inject loop: %s" % str(ex))
def pullWork(self, resources = None, draining_resources = None, continuousReplication = True): """ Pull work from another WorkQueue to be processed If resources passed in get work for them, if not available resources from get from wmbs. """ if not self.params['ParentQueueCouchUrl']: msg = 'Unable to pull work from parent, ParentQueueCouchUrl not provided' self.logger.warning(msg) return 0 if not self.backend.isAvailable() or not self.parent_queue.isAvailable(): self.logger.info('Backend busy or down: skipping work pull') return 0 if self.params['DrainMode']: self.logger.info('Draining queue: skipping work pull') return 0 if not draining_resources: draining_resources = {} if not resources: # find out available resources from wmbs from WMCore.WorkQueue.WMBSHelper import freeSlots sites = freeSlots(self.params['QueueDepth'], knownCmsSites = cmsSiteNames()) draining_sites = freeSlots(self.params['QueueDepth'], onlyDrain = True) # resources for new work are free wmbs resources minus what we already have queued _, resources = self.backend.availableWork(sites) draining_resources = draining_sites # don't minus available as large run-anywhere could decimate if not resources and not draining_resources: self.logger.info('Not pulling more work. No free slots.') return 0 left_over = self.parent_queue.getElements('Negotiating', returnIdOnly = True, ChildQueueUrl = self.params['QueueURL']) if left_over: self.logger.info('Not pulling more work. Still replicating %d previous units' % len(left_over)) return 0 still_processing = self.backend.getInboxElements('Negotiating', returnIdOnly = True) if still_processing: self.logger.info('Not pulling more work. Still processing %d previous units' % len(still_processing)) return 0 self.logger.info("Pull work for sites %s: " % str(resources)) work, _ = self.parent_queue.availableWork(resources, self.params['Teams']) # get work for draining sites (only get work for existing workflows) work.extend(self.parent_queue.availableWork(draining_resources, self.params['Teams'], self.backend.getWorkflows())[0]) if not work: self.logger.info('No available work in parent queue.') return 0 work = self._assignToChildQueue(self.params['QueueURL'], *work) # do this whether we have work or not - other events i.e. cancel may have happened self.backend.pullFromParent(continuous = continuousReplication) return len(work)
def getWorks(self): """ Inject work into wmbs for idle sites """ self.queue.logger.info("Getting work and feeding WMBS files") # need to make sure jobs are created resources, jobCounts = freeSlots(minusRunning = True, allowedStates = ['Normal', 'Draining'], knownCmsSites = cmsSiteNames()) for site in resources: self.queue.logger.info("I need %d jobs on site %s" % (resources[site], site)) self.previousWorkList = self.queue.getWork(resources, jobCounts) self.queue.logger.info("%s of units of work acquired for file creation" % len(self.previousWorkList)) return
def getWorks(self): """ Inject work into wmbs for idle sites """ self.queue.logger.info("Getting work and feeding WMBS files") # need to make sure jobs are created resources = freeSlots(minusRunning=True, allowedStates=['Normal', 'Draining'], knownCmsSites=cmsSiteNames()) for site in resources: self.queue.logger.info("I need %d jobs on site %s" % (resources[site], site)) self.previousWorkList = self.queue.getWork(resources) self.queue.logger.info( "%s of units of work acquired for file creation" % len(self.previousWorkList)) return
def algorithm(self, parameters): """ Get work from local workqueue to be injected into WMBS/DBSBuffer """ self.queue.logger.info("Getting work and feeding WMBS files...") try: # need to make sure jobs are created resources, jobCounts = freeSlots(minusRunning=True, allowedStates=['Normal', 'Draining'], knownCmsSites=cmsSiteNames()) for site in resources: self.queue.logger.info("I need %d jobs on site %s" % (resources[site], site)) abortedAndForceCompleteRequests = self.abortedAndForceCompleteWorkflowCache.getData() previousWorkList = self.queue.getWork(resources, jobCounts, excludeWorkflows=abortedAndForceCompleteRequests) self.queue.logger.info("Acquired %s units of work for WMBS file creation", len(previousWorkList)) except Exception as ex: self.queue.logger.error("Error in wmbs inject loop: %s" % str(ex))
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) if task.inputLocationFlag(): # Then get the locations from the site whitelist/blacklist + SiteDB siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() if siteWhitelist: # Just get the ses matching the whitelists self.sites = siteWhitelist elif siteBlacklist: # Get all CMS sites less the blacklist allSites = cmsSiteNames() self.sites = list(set(allSites) - set (siteBlacklist)) else: # Run at any CMS site self.sites = cmsSiteNames() blocks = [] # Take data inputs or from spec if not self.data: if blockWhiteList: self.data = dict((block, []) for block in blockWhiteList) else: self.data = {datasetPath : []} # same structure as in WorkQueueElement for data in self.data: if data.find('#') > -1: Lexicon.block(data) # check block name datasetPath = str(data.split('#')[0]) blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name for block in dbs.listFileBlocks(data): blocks.append(str(block)) for blockName in blocks: # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue if task.getLumiMask() and blockName not in maskedBlocks: continue block = dbs.getDBSSummaryInfo(datasetPath, block = blockName) # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0': continue #check lumi restrictions if task.getLumiMask(): accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] ) #use the information given from getMaskedBlocks to compute che size of the block block['NumberOfFiles'] = len(maskedBlocks[blockName]) #ratio = lumis which are ok in the block / total num lumis ratioAccepted = 1. * accepted_lumis / float(block['NumberOfLumis']) block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratioAccepted block[self.lumiType] = accepted_lumis # check run restrictions elif runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block = block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue if len(runs) == len(runLumis): # If there is no change in the runs, then we can skip recalculating lumi counts recalculateLumiCounts = False if recalculateLumiCounts: # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock(fileBlockName = block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 if acceptedFile: acceptedFileCount += 1 acceptedLumiCount += acceptedFileLumiCount if len(fileEntry['LumiList']) != acceptedFileLumiCount: acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents']/len(fileEntry['LumiList']) else: acceptedEventCount += fileEntry['NumberOfEvents'] block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount # save locations if task.inputLocationFlag(): self.data[block['block']] = self.sites else: self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])) validBlocks.append(block) return validBlocks
def pullWork(self, resources=None, draining_resources=None, continuousReplication=True): """ Pull work from another WorkQueue to be processed If resources passed in get work for them, if not available resources from get from wmbs. """ if not self.params['ParentQueueCouchUrl']: msg = 'Unable to pull work from parent, ParentQueueCouchUrl not provided' self.logger.warning(msg) return 0 if not self.backend.isAvailable() or not self.parent_queue.isAvailable( ): self.logger.info('Backend busy or down: skipping work pull') return 0 if self.params['DrainMode']: self.logger.info('Draining queue: skipping work pull') return 0 if not draining_resources: draining_resources = {} if not resources: # find out available resources from wmbs from WMCore.WorkQueue.WMBSHelper import freeSlots sites = freeSlots(self.params['QueueDepth'], knownCmsSites=cmsSiteNames()) draining_sites = freeSlots(self.params['QueueDepth'], allowedStates=['Draining']) # resources for new work are free wmbs resources minus what we already have queued _, resources = self.backend.availableWork(sites) draining_resources = draining_sites # don't minus available as large run-anywhere could decimate if not resources and not draining_resources: self.logger.info('Not pulling more work. No free slots.') return 0 left_over = self.parent_queue.getElements( 'Negotiating', returnIdOnly=True, ChildQueueUrl=self.params['QueueURL']) if left_over: self.logger.info( 'Not pulling more work. Still replicating %d previous units' % len(left_over)) return 0 still_processing = self.backend.getInboxElements('Negotiating', returnIdOnly=True) if still_processing: self.logger.info( 'Not pulling more work. Still processing %d previous units' % len(still_processing)) return 0 self.logger.info("Pull work for sites %s: " % str(resources)) work, _ = self.parent_queue.availableWork(resources, self.params['Teams']) # get work for draining sites (only get work for existing workflows) work.extend( self.parent_queue.availableWork(draining_resources, self.params['Teams'], self.backend.getWorkflows())[0]) if not work: self.logger.info('No available work in parent queue.') return 0 work = self._assignToChildQueue(self.params['QueueURL'], *work) # do this whether we have work or not - other events i.e. cancel may have happened self.backend.pullFromParent(continuous=continuousReplication) return len(work)
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) if task.inputLocationFlag(): # Then get the locations from the site whitelist/blacklist + SiteDB siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() if siteWhitelist: # Just get the ses matching the whitelists self.sites = siteWhitelist elif siteBlacklist: # Get all CMS sites less the blacklist allSites = cmsSiteNames() self.sites = list(set(allSites) - set (siteBlacklist)) else: # Run at any CMS site self.sites = cmsSiteNames() blocks = [] # Take data inputs or from spec if not self.data: if blockWhiteList: self.data = dict((block, []) for block in blockWhiteList) else: self.data = {datasetPath : []} # same structure as in WorkQueueElement for data in self.data: if data.find('#') > -1: Lexicon.block(data) # check block name datasetPath = str(data.split('#')[0]) blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name for block in dbs.listFileBlocks(data): blocks.append(str(block)) for blockName in blocks: # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue if task.getLumiMask() and blockName not in maskedBlocks: continue block = dbs.getDBSSummaryInfo(datasetPath, block = blockName) # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0': continue #check lumi restrictions if task.getLumiMask(): accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] ) #use the information given from getMaskedBlocks to compute che size of the block block['NumberOfFiles'] = len(maskedBlocks[blockName]) #ratio = lumis which are ok in the block / total num lumis ratio_accepted = 1. * accepted_lumis / float(block['NumberOfLumis']) block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted block[self.lumiType] = accepted_lumis # check run restrictions elif runWhiteList or runBlackList: # listRuns returns a run number per lumi section full_lumi_list = dbs.listRuns(block = block['block']) runs = set(full_lumi_list) # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) accepted_lumis = [x for x in full_lumi_list if x in runs] ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list) block[self.lumiType] = len(accepted_lumis) block['NumberOfFiles'] = float(block['NumberOfFiles']) * ratio_accepted block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted # save locations if task.inputLocationFlag(): self.data[block['block']] = self.sites else: self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])) validBlocks.append(block) return validBlocks