def validBlocks(self, task): """Return blocks that pass the input data restriction""" validBlocks = [] # TODO take the chunk size from parameter chunkSize = 200 acdcInfo = task.getInputACDC() if not acdcInfo: raise WorkQueueWMSpecError( self.wmspec, 'No acdc section for %s' % task.getPathName()) acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) if self.data: acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0]) else: #if self.data is not passed, assume the the data is input dataset # from the spec acdcBlockSplit = False if acdcBlockSplit: dbsBlock = {} dbsBlock['Name'] = self.data.keys()[0] block = acdc.getChunkInfo( acdcInfo['collection'], acdcBlockSplit['TaskName'], acdcBlockSplit['Offset'], acdcBlockSplit['NumOfFiles'], user=self.wmspec.getOwner().get("name"), group=self.wmspec.getOwner().get("group")) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] dbsBlock['ACDC'] = acdcInfo dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"]) validBlocks.append(dbsBlock) else: acdcBlocks = acdc.chunkFileset( acdcInfo['collection'], acdcInfo['fileset'], chunkSize, user=self.wmspec.getOwner().get("name"), group=self.wmspec.getOwner().get("group")) for block in acdcBlocks: dbsBlock = {} dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(), acdcInfo["fileset"], block['offset'], block['files']) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] dbsBlock["Sites"] = sitesFromStorageEelements( block["locations"]) dbsBlock['ACDC'] = acdcInfo validBlocks.append(dbsBlock) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() Lexicon.dataset(datasetPath) # check dataset name validBlocks = [] locations = None blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() for blockName in dbs.listFileBlocks(datasetPath): block = dbs.getDBSSummaryInfo(datasetPath, block = blockName) # check block restrictions if blockWhiteList and block['block'] not in blockWhiteList: continue if block['block'] in blockBlackList: continue # check run restrictions if runWhiteList or runBlackList: # listRuns returns a run number per lumi section full_lumi_list = dbs.listRuns(block = block['block']) runs = set(full_lumi_list) # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) accepted_lumis = [x for x in full_lumi_list if x in runs] ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list) block[self.lumiType] = len(accepted_lumis) block['NumberOfFiles'] = float(block['NumberOfFiles']) * ratio_accepted block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted validBlocks.append(block) if locations is None: locations = set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))) else: locations = locations.intersection(set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])))) # all needed blocks present at these sites if locations: self.data[datasetPath] = list(locations) return validBlocks
def validBlocks(self, task): """Return blocks that pass the input data restriction""" validBlocks = [] # TODO take the chunk size from parameter chunkSize = 200 acdcInfo = task.getInputACDC() if not acdcInfo: raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName()) acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) if self.data: acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0]) else: #if self.data is not passed, assume the the data is input dataset # from the spec acdcBlockSplit = False if acdcBlockSplit: dbsBlock = {} dbsBlock['Name'] = self.data.keys()[0] block = acdc.getChunkInfo(acdcInfo['collection'], acdcBlockSplit['TaskName'], acdcBlockSplit['Offset'], acdcBlockSplit['NumOfFiles'], user = self.wmspec.getOwner().get("name"), group = self.wmspec.getOwner().get("group")) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] dbsBlock['ACDC'] = acdcInfo dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"]) validBlocks.append(dbsBlock) else: acdcBlocks = acdc.chunkFileset(acdcInfo['collection'], acdcInfo['fileset'], chunkSize, user = self.wmspec.getOwner().get("name"), group = self.wmspec.getOwner().get("group")) for block in acdcBlocks: dbsBlock = {} dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(), acdcInfo["fileset"], block['offset'], block['files']) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"]) dbsBlock['ACDC'] = acdcInfo validBlocks.append(dbsBlock) return validBlocks
def singleChunk(self, acdc, acdcInfo, task): """Return a single block (inside a list) with all associated ACDC records""" result = [] acdcBlock = acdc.singleChunkFileset( acdcInfo['collection'], acdcInfo['fileset'], user=self.wmspec.getOwner().get("name"), group=self.wmspec.getOwner().get("group")) dbsBlock = {} dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(), acdcInfo["fileset"], acdcBlock['offset'], acdcBlock['files']) dbsBlock['NumberOfFiles'] = acdcBlock['files'] dbsBlock['NumberOfEvents'] = acdcBlock['events'] dbsBlock['NumberOfLumis'] = acdcBlock['lumis'] if task.inputLocationFlag(): dbsBlock["Sites"] = self.sites else: dbsBlock["Sites"] = sitesFromStorageEelements( acdcBlock["locations"]) dbsBlock['ACDC'] = acdcInfo if dbsBlock['NumberOfFiles']: result.append(dbsBlock) return result
def split(self): """Apply policy to spec""" dbs = self.dbs() for block in self.validBlocks(self.initialTask, dbs): #set the parent flag for processing only for clarity on the couch doc parentList = {} parentFlag = False #TODO this is slow process needs to change in DBS3 if self.initialTask.parentProcessingFlag(): parentFlag = True for dbsBlock in dbs.listBlockParents(block["block"]): if self.initialTask.inputLocationFlag(): parentList[dbsBlock["Name"]] = self.sites else: parentList[dbsBlock["Name"]] = sitesFromStorageEelements(dbsBlock['StorageElementList']) self.newQueueElement(Inputs = {block['block'] : self.data.get(block['block'], [])}, ParentFlag = parentFlag, ParentData = parentList, NumberOfLumis = int(block[self.lumiType]), NumberOfFiles = int(block['NumberOfFiles']), NumberOfEvents = int(block['NumberOfEvents']), Jobs = ceil(float(block[self.args['SliceType']]) / float(self.args['SliceSize'])), OpenForNewData = True if str(block.get('OpenForWriting')) == '1' else False, NoLocationUpdate = self.initialTask.inputLocationFlag() )
def split(self): """Apply policy to spec""" dbs = self.dbs() for block in self.validBlocks(self.initialTask, dbs): #set the parent flag for processing only for clarity on the couch doc parentList = {} parentFlag = False #TODO this is slow process needs to change in DBS3 if self.initialTask.parentProcessingFlag(): parentFlag = True for dbsBlock in dbs.listBlockParents(block["block"]): if self.initialTask.inputLocationFlag(): parentList[dbsBlock["Name"]] = self.sites else: parentList[ dbsBlock["Name"]] = sitesFromStorageEelements( dbsBlock['StorageElementList']) self.newQueueElement( Inputs={block['block']: self.data.get(block['block'], [])}, ParentFlag=parentFlag, ParentData=parentList, NumberOfLumis=int(block[self.lumiType]), NumberOfFiles=int(block['NumberOfFiles']), NumberOfEvents=int(block['NumberOfEvents']), Jobs=ceil( float(block[self.args['SliceType']]) / float(self.args['SliceSize'])), OpenForNewData=True if str(block.get('OpenForWriting')) == '1' else False, NoLocationUpdate=self.initialTask.inputLocationFlag())
def fixedSizeChunk(self, acdc, acdcInfo, task): """Return a set of blocks with a fixed number of ACDC records""" fixedSizeBlocks = [] chunkSize = 250 acdcBlocks = acdc.chunkFileset(acdcInfo['collection'], acdcInfo['fileset'], chunkSize, user = self.wmspec.getOwner().get("name"), group = self.wmspec.getOwner().get("group")) for block in acdcBlocks: dbsBlock = {} dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(), acdcInfo["fileset"], block['offset'], block['files']) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] if task.inputLocationFlag(): dbsBlock["Sites"] = self.sites else: dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"]) dbsBlock['ACDC'] = acdcInfo if dbsBlock['NumberOfFiles']: fixedSizeBlocks.append(dbsBlock) return fixedSizeBlocks
def fixedSizeChunk(self, acdc, acdcInfo, task): """Return a set of blocks with a fixed number of ACDC records""" fixedSizeBlocks = [] chunkSize = 250 acdcBlocks = acdc.chunkFileset( acdcInfo['collection'], acdcInfo['fileset'], chunkSize, user=self.wmspec.getOwner().get("name"), group=self.wmspec.getOwner().get("group")) for block in acdcBlocks: dbsBlock = {} dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(), acdcInfo["fileset"], block['offset'], block['files']) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] if task.inputLocationFlag(): dbsBlock["Sites"] = self.sites else: dbsBlock["Sites"] = sitesFromStorageEelements( block["locations"]) dbsBlock['ACDC'] = acdcInfo if dbsBlock['NumberOfFiles']: fixedSizeBlocks.append(dbsBlock) return fixedSizeBlocks
def getDatasetLocations(self, datasets): """Returns a dictionary with the location of the datasets according to DBS""" result = {} for dbsUrl in datasets: dbs = self.dbs(dbsUrl) for datasetPath in datasets[dbsUrl]: locations = sitesFromStorageEelements(dbs.listDatasetLocation(datasetPath)) result[datasetPath] = locations return result
def getDatasetLocations(self, datasets): """Returns a dictionary with the location of the datasets according to DBS""" result = {} for dbsUrl in datasets: dbs = self.dbs(dbsUrl) for datasetPath in datasets[dbsUrl]: locations = sitesFromStorageEelements( dbs.listDatasetLocation(datasetPath)) result[datasetPath] = locations return result
def validBlocks(self, task): """Return blocks that pass the input data restriction according to the splitting algorithm""" validBlocks = [] acdcInfo = task.getInputACDC() if not acdcInfo: raise WorkQueueWMSpecError( self.wmspec, 'No acdc section for %s' % task.getPathName()) acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) if self.data: acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0]) else: # if self.data is not passed, assume the the data is input dataset # from the spec acdcBlockSplit = False if acdcBlockSplit: dbsBlock = {} dbsBlock['Name'] = self.data.keys()[0] block = acdc.getChunkInfo( acdcInfo['collection'], acdcBlockSplit['TaskName'], acdcBlockSplit['Offset'], acdcBlockSplit['NumOfFiles'], user=self.wmspec.getOwner().get("name"), group=self.wmspec.getOwner().get("group")) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] dbsBlock['ACDC'] = acdcInfo if task.inputLocationFlag(): dbsBlock["Sites"] = self.sites else: dbsBlock["Sites"] = sitesFromStorageEelements( block["locations"]) validBlocks.append(dbsBlock) else: if self.args['SplittingAlgo'] in self.unsupportedAlgos: raise WorkQueueWMSpecError( self.wmspec, 'ACDC is not supported for %s' % self.args['SplittingAlgo']) splittingFunc = self.defaultAlgo if self.args['SplittingAlgo'] in self.algoMapping: splittingFunc = self.algoMapping[self.args['SplittingAlgo']] validBlocks = splittingFunc(acdc, acdcInfo, task) return validBlocks
def split(self): """Apply policy to spec""" dbs = self.dbs() for block in self.validBlocks(self.initialTask, dbs): #set the parent flag for processing only for clarity on the couch doc parentList = {} parentFlag = False #TODO this is slow process needs to change in DBS3 if self.initialTask.parentProcessingFlag(): parentFlag = True for dbsBlock in dbs.listBlockParents(block["block"]): parentList[dbsBlock["Name"]] = sitesFromStorageEelements(dbsBlock['StorageElementList']) self.newQueueElement(Inputs = {block['block'] : self.data.get(block['block'], [])}, ParentFlag = parentFlag, ParentData = parentList, Jobs = ceil(float(block[self.args['SliceType']]) / float(self.args['SliceSize'])) )
def validBlocks(self, task): """Return blocks that pass the input data restriction according to the splitting algorithm""" validBlocks = [] acdcInfo = task.getInputACDC() if not acdcInfo: raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName()) acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) if self.data: acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0]) else: # if self.data is not passed, assume the the data is input dataset # from the spec acdcBlockSplit = False if acdcBlockSplit: dbsBlock = {} dbsBlock['Name'] = self.data.keys()[0] block = acdc.getChunkInfo(acdcInfo['collection'], acdcBlockSplit['TaskName'], acdcBlockSplit['Offset'], acdcBlockSplit['NumOfFiles'], user = self.wmspec.getOwner().get("name"), group = self.wmspec.getOwner().get("group")) dbsBlock['NumberOfFiles'] = block['files'] dbsBlock['NumberOfEvents'] = block['events'] dbsBlock['NumberOfLumis'] = block['lumis'] dbsBlock['ACDC'] = acdcInfo if task.inputLocationFlag(): dbsBlock["Sites"] = self.sites else: dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"]) validBlocks.append(dbsBlock) else: if self.args['SplittingAlgo'] in self.unsupportedAlgos: raise WorkQueueWMSpecError(self.wmspec, 'ACDC is not supported for %s' % self.args['SplittingAlgo']) splittingFunc = self.defaultAlgo if self.args['SplittingAlgo'] in self.algoMapping: splittingFunc = self.algoMapping[self.args['SplittingAlgo']] validBlocks = splittingFunc(acdc, acdcInfo, task) return validBlocks
def singleChunk(self, acdc, acdcInfo, task): """Return a single block (inside a list) with all associated ACDC records""" result = [] acdcBlock = acdc.singleChunkFileset(acdcInfo['collection'], acdcInfo['fileset'], user = self.wmspec.getOwner().get("name"), group = self.wmspec.getOwner().get("group")) dbsBlock = {} dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(), acdcInfo["fileset"], acdcBlock['offset'], acdcBlock['files']) dbsBlock['NumberOfFiles'] = acdcBlock['files'] dbsBlock['NumberOfEvents'] = acdcBlock['events'] dbsBlock['NumberOfLumis'] = acdcBlock['lumis'] if task.inputLocationFlag(): dbsBlock["Sites"] = self.sites else: dbsBlock["Sites"] = sitesFromStorageEelements(acdcBlock["locations"]) dbsBlock['ACDC'] = acdcInfo if dbsBlock['NumberOfFiles']: result.append(dbsBlock) return result
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() Lexicon.dataset(datasetPath) # check dataset name validBlocks = [] locations = None blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() for blockName in dbs.listFileBlocks(datasetPath): block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # check block restrictions if blockWhiteList and block['block'] not in blockWhiteList: continue if block['block'] in blockBlackList: continue # check run restrictions if runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block['block']) runs = set(runLumis.keys()) # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) acceptedLumiCount = 0 fullLumiCount = 0 acceptedLumiCount = 0 fullLumiCount = 0 for run in runLumis: if run in runs: acceptedLumiCount += runLumis[run] fullLumiCount += runLumis[run] ratioAccepted = float(acceptedLumiCount) / fullLumiCount block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = int( float(block['NumberOfFiles']) * ratioAccepted) block['NumberOfEvents'] = int( float(block['NumberOfEvents']) * ratioAccepted) validBlocks.append(block) if locations is None: locations = set( sitesFromStorageEelements( dbs.listFileBlockLocation(block['block']))) else: locations = locations.intersection( set( sitesFromStorageEelements( dbs.listFileBlockLocation(block['block'])))) # all needed blocks present at these sites if locations: self.data[datasetPath] = list(locations) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getLumiMask( ): #if we have a lumi mask get only the relevant blocks maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) if task.inputLocationFlag(): # Then get the locations from the site whitelist/blacklist + SiteDB siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) blocks = [] # Take data inputs or from spec if not self.data: if blockWhiteList: self.data = dict((block, []) for block in blockWhiteList) else: self.data = { datasetPath: [] } # same structure as in WorkQueueElement for data in self.data: if data.find('#') > -1: Lexicon.block(data) # check block name datasetPath = str(data.split('#')[0]) blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name for block in dbs.listFileBlocks(data, onlyClosedBlocks=True): blocks.append(str(block)) for blockName in blocks: # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue if blockName in self.blockBlackListModifier: # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist continue if task.getLumiMask() and blockName not in maskedBlocks: self.rejectedWork.append(blockName) continue block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0': self.rejectedWork.append(blockName) continue #check lumi restrictions if task.getLumiMask(): accepted_lumis = sum([ len(maskedBlocks[blockName][file].getLumis()) for file in maskedBlocks[blockName] ]) #use the information given from getMaskedBlocks to compute che size of the block block['NumberOfFiles'] = len(maskedBlocks[blockName]) #ratio = lumis which are ok in the block / total num lumis ratioAccepted = 1. * accepted_lumis / float( block['NumberOfLumis']) block['NumberOfEvents'] = float( block['NumberOfEvents']) * ratioAccepted block[self.lumiType] = accepted_lumis # check run restrictions elif runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: self.rejectedWork.append(blockName) continue if len(runs) == len(runLumis): # If there is no change in the runs, then we can skip recalculating lumi counts recalculateLumiCounts = False if recalculateLumiCounts: # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock( fileBlockName=block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 if acceptedFile: acceptedFileCount += 1 acceptedLumiCount += acceptedFileLumiCount if len(fileEntry['LumiList'] ) != acceptedFileLumiCount: acceptedEventCount += float( acceptedFileLumiCount ) * fileEntry['NumberOfEvents'] / len( fileEntry['LumiList']) else: acceptedEventCount += fileEntry[ 'NumberOfEvents'] block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount # save locations if task.inputLocationFlag(): self.data[block['block']] = self.sites else: self.data[block['block']] = sitesFromStorageEelements( dbs.listFileBlockLocation(block['block'])) # TODO: need to decide what to do when location is no find. # There could be case for network problem (no connection to dbs, phedex) # or DBS se is not recorded (This will be retried anyway by location mapper) if not self.data[block['block']]: self.data[block['block']] = ["NoInitialSite"] # # No sites for this block, move it to rejected # self.rejectedWork.append(blockName) # continue validBlocks.append(block) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() blocks = [] # Take data inputs or from spec if not self.data: self.data = {datasetPath : []} # same structure as in WorkQueueElement #blocks = dbs.getFileBlocksInfo(datasetPath, locations = False) #else: #dataItems = self.data.keys() for data in self.data: if data.find('#') > -1: Lexicon.block(data) # check dataset name datasetPath = str(data.split('#')[0]) blocks.extend(dbs.getFileBlocksInfo(datasetPath, blockName = str(data), locations = True)) else: Lexicon.dataset(data) # check dataset name blocks.extend(dbs.getFileBlocksInfo(datasetPath, locations = True)) for block in blocks: # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block['NumberOfFiles']: continue # check block restrictions if blockWhiteList and block['Name'] not in blockWhiteList: continue if block['Name'] in blockBlackList: continue # check run restrictions if runWhiteList or runBlackList: # listRuns returns a run number per lumi section full_lumi_list = dbs.listRuns(block = block['Name']) runs = set(full_lumi_list) # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) accepted_lumis = [x for x in full_lumi_list if x in runs] ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list) block[self.lumiType] = len(accepted_lumis) block['NumberOfFiles'] *= ratio_accepted block['NumberOfEvents'] *= ratio_accepted # get lumi info if needed and not already available if self.args['SliceType'] == self.lumiType and not block.get(self.lumiType): blockSummary = dbs.getDBSSummaryInfo(block = block["Name"]) block[self.lumiType] = blockSummary[self.lumiType] # save locations self.data[block['Name']] = sitesFromStorageEelements([x['Name'] for x in block['StorageElementList']]) validBlocks.append(block) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getLumiMask(): # if we have a lumi mask get only the relevant blocks maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) if task.inputLocationFlag(): # Then get the locations from the site whitelist/blacklist + SiteDB siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) blocks = [] # Take data inputs or from spec if not self.data: if blockWhiteList: self.data = dict((block, []) for block in blockWhiteList) else: self.data = {datasetPath: []} # same structure as in WorkQueueElement for data in self.data: if data.find("#") > -1: Lexicon.block(data) # check block name datasetPath = str(data.split("#")[0]) blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name for block in dbs.listFileBlocks(data, onlyClosedBlocks=True): blocks.append(str(block)) for blockName in blocks: # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue if blockName in self.blockBlackListModifier: # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist continue if task.getLumiMask() and blockName not in maskedBlocks: self.rejectedWork.append(blockName) continue block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block["NumberOfFiles"] or block["NumberOfFiles"] == "0": self.rejectedWork.append(blockName) continue # check lumi restrictions if task.getLumiMask(): accepted_lumis = sum([len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName]]) # use the information given from getMaskedBlocks to compute che size of the block block["NumberOfFiles"] = len(maskedBlocks[blockName]) # ratio = lumis which are ok in the block / total num lumis ratioAccepted = 1.0 * accepted_lumis / float(block["NumberOfLumis"]) block["NumberOfEvents"] = float(block["NumberOfEvents"]) * ratioAccepted block[self.lumiType] = accepted_lumis # check run restrictions elif runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block["block"]) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: self.rejectedWork.append(blockName) continue if len(runs) == len(runLumis): # If there is no change in the runs, then we can skip recalculating lumi counts recalculateLumiCounts = False if recalculateLumiCounts: # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock(fileBlockName=block["block"]) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry["LumiList"]: runNumber = lumiInfo["RunNumber"] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 if acceptedFile: acceptedFileCount += 1 acceptedLumiCount += acceptedFileLumiCount if len(fileEntry["LumiList"]) != acceptedFileLumiCount: acceptedEventCount += ( float(acceptedFileLumiCount) * fileEntry["NumberOfEvents"] / len(fileEntry["LumiList"]) ) else: acceptedEventCount += fileEntry["NumberOfEvents"] block[self.lumiType] = acceptedLumiCount block["NumberOfFiles"] = acceptedFileCount block["NumberOfEvents"] = acceptedEventCount # save locations if task.inputLocationFlag(): self.data[block["block"]] = self.sites else: self.data[block["block"]] = sitesFromStorageEelements(dbs.listFileBlockLocation(block["block"])) # TODO: need to decide what to do when location is no find. # There could be case for network problem (no connection to dbs, phedex) # or DBS se is not recorded (This will be retried anyway by location mapper) if not self.data[block["block"]]: self.data[block["block"]] = ["NoInitialSite"] # # No sites for this block, move it to rejected # self.rejectedWork.append(blockName) # continue validBlocks.append(block) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) if task.inputLocationFlag(): # Then get the locations from the site whitelist/blacklist + SiteDB siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() if siteWhitelist: # Just get the ses matching the whitelists self.sites = siteWhitelist elif siteBlacklist: # Get all CMS sites less the blacklist allSites = cmsSiteNames() self.sites = list(set(allSites) - set (siteBlacklist)) else: # Run at any CMS site self.sites = cmsSiteNames() blocks = [] # Take data inputs or from spec if not self.data: if blockWhiteList: self.data = dict((block, []) for block in blockWhiteList) else: self.data = {datasetPath : []} # same structure as in WorkQueueElement for data in self.data: if data.find('#') > -1: Lexicon.block(data) # check block name datasetPath = str(data.split('#')[0]) blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name for block in dbs.listFileBlocks(data): blocks.append(str(block)) for blockName in blocks: # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue if task.getLumiMask() and blockName not in maskedBlocks: continue block = dbs.getDBSSummaryInfo(datasetPath, block = blockName) # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0': continue #check lumi restrictions if task.getLumiMask(): accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] ) #use the information given from getMaskedBlocks to compute che size of the block block['NumberOfFiles'] = len(maskedBlocks[blockName]) #ratio = lumis which are ok in the block / total num lumis ratioAccepted = 1. * accepted_lumis / float(block['NumberOfLumis']) block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratioAccepted block[self.lumiType] = accepted_lumis # check run restrictions elif runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block = block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue if len(runs) == len(runLumis): # If there is no change in the runs, then we can skip recalculating lumi counts recalculateLumiCounts = False if recalculateLumiCounts: # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock(fileBlockName = block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 if acceptedFile: acceptedFileCount += 1 acceptedLumiCount += acceptedFileLumiCount if len(fileEntry['LumiList']) != acceptedFileLumiCount: acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents']/len(fileEntry['LumiList']) else: acceptedEventCount += fileEntry['NumberOfEvents'] block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount # save locations if task.inputLocationFlag(): self.data[block['block']] = self.sites else: self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])) validBlocks.append(block) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) blocks = [] # Take data inputs or from spec if not self.data: if blockWhiteList: self.data = dict((block, []) for block in blockWhiteList) else: self.data = {datasetPath : []} # same structure as in WorkQueueElement for data in self.data: if data.find('#') > -1: Lexicon.block(data) # check block name datasetPath = str(data.split('#')[0]) blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name for block in dbs.listFileBlocks(data): blocks.append(str(block)) for blockName in blocks: # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue if task.getLumiMask() and blockName not in maskedBlocks: continue block = dbs.getDBSSummaryInfo(datasetPath, block = blockName) # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0': continue #check lumi restrictions if task.getLumiMask(): accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] ) #use the information given from getMaskedBlocks to compute che size of the block block['NumberOfFiles'] = len(maskedBlocks[blockName]) #ratio = lumis which are ok in the block / total num lumis ratio_accepted = 1. * accepted_lumis / float(block['NumberOfLumis']) block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted block[self.lumiType] = accepted_lumis # check run restrictions elif runWhiteList or runBlackList: # listRuns returns a run number per lumi section full_lumi_list = dbs.listRuns(block = block['block']) runs = set(full_lumi_list) # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) accepted_lumis = [x for x in full_lumi_list if x in runs] ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list) block[self.lumiType] = len(accepted_lumis) block['NumberOfFiles'] = float(block['NumberOfFiles']) * ratio_accepted block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted # save locations self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])) validBlocks.append(block) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() Lexicon.dataset(datasetPath) # check dataset name validBlocks = [] locations = None blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() siteWhiteList = task.siteWhitelist() for blockName in dbs.listFileBlocks(datasetPath): block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # check block restrictions if blockWhiteList and block['block'] not in blockWhiteList: continue if block['block'] in blockBlackList: continue # check run restrictions if runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue if recalculateLumiCounts: # get correct lumi count # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock( fileBlockName=block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 if acceptedFile: acceptedFileCount += 1 acceptedLumiCount += acceptedFileLumiCount if len(fileEntry['LumiList'] ) != acceptedFileLumiCount: acceptedEventCount += float( acceptedFileLumiCount ) * fileEntry['NumberOfEvents'] / len( fileEntry['LumiList']) else: acceptedEventCount += fileEntry[ 'NumberOfEvents'] else: acceptedLumiCount = block["NumberOfLumis"] acceptedFileCount = block['NumberOfFiles'] acceptedEventCount = block['NumberOfEvents'] # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) fullLumiCount = block["NumberOfLumis"] block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount validBlocks.append(block) if locations is None: locations = set( sitesFromStorageEelements( dbs.listFileBlockLocation(block['block']))) else: locations = locations.intersection( set( sitesFromStorageEelements( dbs.listFileBlockLocation(block['block'])))) if self.wmspec.locationDataSourceFlag(): locations = locations.union(siteWhiteList) # all needed blocks present at these sites if locations: self.data[datasetPath] = list(locations) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() Lexicon.dataset(datasetPath) # check dataset name validBlocks = [] locations = None blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() siteWhiteList = task.siteWhitelist() for blockName in dbs.listFileBlocks(datasetPath): block = dbs.getDBSSummaryInfo(datasetPath, block = blockName) # check block restrictions if blockWhiteList and block['block'] not in blockWhiteList: continue if block['block'] in blockBlackList: continue # check run restrictions if runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block = block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue if recalculateLumiCounts: # get correct lumi count # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock(fileBlockName = block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 if acceptedFile: acceptedFileCount += 1 acceptedLumiCount += acceptedFileLumiCount if len(fileEntry['LumiList']) != acceptedFileLumiCount: acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents']/len(fileEntry['LumiList']) else: acceptedEventCount += fileEntry['NumberOfEvents'] else: acceptedLumiCount = block["NumberOfLumis"] acceptedFileCount = block['NumberOfFiles'] acceptedEventCount = block['NumberOfEvents'] # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) fullLumiCount = block["NumberOfLumis"] block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount validBlocks.append(block) if locations is None: locations = set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))) else: locations = locations.intersection(set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])))) if self.wmspec.locationDataSourceFlag(): locations = locations.union(siteWhiteList) # all needed blocks present at these sites if locations: self.data[datasetPath] = list(locations) return validBlocks