def split(self): """Apply policy to spec""" # Prepare a site list in case we need it siteWhitelist = self.initialTask.siteWhitelist() siteBlacklist = self.initialTask.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) for block in self.validBlocks(self.initialTask): parentList = {} parentFlag = False if self.initialTask.parentProcessingFlag(): parentFlag = True parentList[block["Name"]] = block['Sites'] self.newQueueElement( Inputs={block['Name']: block['Sites']}, ParentFlag=parentFlag, ParentData=parentList, NumberOfLumis=block[self.lumiType], NumberOfFiles=block['NumberOfFiles'], NumberOfEvents=block['NumberOfEvents'], Jobs=ceil( float(block[self.args['SliceType']]) / float(self.args['SliceSize'])), ACDC=block['ACDC'], NoInputUpdate=self.initialTask.getTrustSitelists().get( 'trustlists'), NoPileupUpdate=self.initialTask.getTrustSitelists().get( 'trustPUlists'))
def split(self): """Apply policy to spec""" # Prepare a site list in case we need it siteWhitelist = self.initialTask.siteWhitelist() siteBlacklist = self.initialTask.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) for block in self.validBlocks(self.initialTask): parentList = {} parentFlag = False if self.initialTask.parentProcessingFlag(): parentFlag = True parentList[block["Name"]] = block['Sites'] self.newQueueElement(Inputs={block['Name']: block['Sites']}, ParentFlag=parentFlag, ParentData=parentList, NumberOfLumis=block[self.lumiType], NumberOfFiles=block['NumberOfFiles'], NumberOfEvents=block['NumberOfEvents'], Jobs=ceil(float(block[self.args['SliceType']]) / float(self.args['SliceSize'])), ACDC=block['ACDC'], NoInputUpdate=self.initialTask.getTrustSitelists().get('trustlists'), NoPileupUpdate=self.initialTask.getTrustSitelists().get('trustPUlists') )
def split(self): """Apply policy to spec""" # Prepare a site list in case we need it siteWhitelist = self.initialTask.siteWhitelist() siteBlacklist = self.initialTask.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) for block in self.validBlocks(self.initialTask): if self.initialTask.parentProcessingFlag(): parentFlag = True else: parentFlag = False self.newQueueElement(Inputs = {block['Name'] : block['Sites']}, ParentFlag = parentFlag, NumberOfLumis = block[self.lumiType], NumberOfFiles = block['NumberOfFiles'], NumberOfEvents = block['NumberOfEvents'], Jobs = ceil(float(block[self.args['SliceType']]) / float(self.args['SliceSize'])), ACDC = block['ACDC'], )
def __call__(self, wmTask): """ Method is called when WorkQueue creates the sandbox for a job. Need to look at the pileup configuration in the spec and query dbs to determine the lfns for the files in the datasets and what sites they're located at (WQ creates the job sandbox). wmTask is instance of WMTask.WMTaskHelper """ fakeSites = [] # check whether we need to overlook the PU data location if wmTask.getTrustSitelists().get('trustPUlists'): fakeSites = makeLocationsList(wmTask.siteWhitelist(), wmTask.siteBlacklist()) for step in wmTask.steps().nodeIterator(): helper = WMStep.WMStepHelper(step) # returns e.g. instance of CMSSWHelper # doesn't seem to be necessary ... strangely (some inheritance involved?) # typeHelper = helper.getTypeHelper() if hasattr(helper.data, "pileup"): self._createPileupConfigFile(helper, fakeSites)
def __call__(self, wmTask): """ Method is called when WorkQueue creates the sandbox for a job. Need to look at the pileup configuration in the spec and query dbs to determine the lfns for the files in the datasets and what sites they're located at (WQ creates the job sandbox). wmTask is instance of WMTask.WMTaskHelper """ fakeSites = [] # check whether we need to pretend PU data location if wmTask.getTrustSitelists(): fakeSites = makeLocationsList(wmTask.siteWhitelist(), wmTask.siteBlacklist()) for step in wmTask.steps().nodeIterator(): helper = WMStep.WMStepHelper(step) # returns e.g. instance of CMSSWHelper # doesn't seem to be necessary ... strangely (some inheritance involved?) # typeHelper = helper.getTypeHelper() if hasattr(helper.data, "pileup"): self._createPileupConfigFile(helper, fakeSites)
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() Lexicon.dataset(datasetPath) # check dataset name validBlocks = [] locations = None blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() lumiMask = task.getLumiMask() if lumiMask: maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) for blockName in dbs.listFileBlocks(datasetPath): # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue blockSummary = dbs.getDBSSummaryInfo(block=blockName) if int(blockSummary.get('NumberOfFiles', 0)) == 0: logging.warning("Block %s being rejected for lack of valid files to process", blockName) self.badWork.append(blockName) continue if self.args['SliceType'] == 'NumberOfRuns': blockSummary['NumberOfRuns'] = dbs.listRuns(block=blockName) # check lumi restrictions if lumiMask: if blockName not in maskedBlocks: logging.warning("Block %s doesn't pass the lumi mask constraints", blockName) self.rejectedWork.append(blockName) continue acceptedLumiCount = sum([len(maskedBlocks[blockName][lfn].getLumis()) for lfn in maskedBlocks[blockName]]) ratioAccepted = 1. * acceptedLumiCount / float(blockSummary['NumberOfLumis']) maskedRuns = [maskedBlocks[blockName][lfn].getRuns() for lfn in maskedBlocks[blockName]] acceptedRuns = set(lumiMask.getRuns()).intersection(set().union(*maskedRuns)) blockSummary['NumberOfFiles'] = len(maskedBlocks[blockName]) blockSummary['NumberOfEvents'] = float(blockSummary['NumberOfEvents']) * ratioAccepted blockSummary[self.lumiType] = acceptedLumiCount blockSummary['NumberOfRuns'] = acceptedRuns # check run restrictions elif runWhiteList or runBlackList: runs = set(dbs.listRuns(block=blockName)) # multi run blocks need special account, requires more DBS calls recalculateLumiCounts = True if len(runs) > 1 else False # apply blacklist and whitelist runs = runs.difference(runBlackList) if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: logging.warning("Block %s doesn't pass the runs constraints", blockName) self.rejectedWork.append(blockName) continue if recalculateLumiCounts: # Recalculate the number of files, lumis and ~events accepted acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock(fileBlockName=blockName) for fileEntry in fileInfo: acceptedFile = False for lumiInfo in fileEntry['LumiList']: if lumiInfo['RunNumber'] in runs: acceptedFile = True acceptedLumiCount += len(lumiInfo['LumiSectionNumber']) if acceptedFile: acceptedFileCount += 1 acceptedEventCount += fileEntry['NumberOfEvents'] else: acceptedLumiCount = blockSummary["NumberOfLumis"] acceptedFileCount = blockSummary['NumberOfFiles'] acceptedEventCount = blockSummary['NumberOfEvents'] blockSummary[self.lumiType] = acceptedLumiCount blockSummary['NumberOfFiles'] = acceptedFileCount blockSummary['NumberOfEvents'] = acceptedEventCount blockSummary['NumberOfRuns'] = runs validBlocks.append(blockSummary) if locations is None: locations = set(dbs.listFileBlockLocation(blockName)) else: locations = locations.intersection(dbs.listFileBlockLocation(blockName)) # all needed blocks present at these sites if task.getTrustSitelists().get('trustlists'): siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) self.data[datasetPath] = self.sites elif locations: self.data[datasetPath] = list(set(self.cric.PNNstoPSNs(locations))) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getLumiMask(): # if we have a lumi mask get only the relevant blocks maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) if task.getTrustSitelists().get('trustlists'): siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) blocks = [] # Take data inputs or from spec if not self.data: if blockWhiteList: self.data = dict((block, []) for block in blockWhiteList) else: self.data = {datasetPath: []} # same structure as in WorkQueueElement for data in self.data: if data.find('#') > -1: Lexicon.block(data) # check block name datasetPath = str(data.split('#')[0]) blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name for block in dbs.listFileBlocks(data, onlyClosedBlocks=True): blocks.append(str(block)) for blockName in blocks: # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue if blockName in self.blockBlackListModifier: # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist continue if task.getLumiMask() and blockName not in maskedBlocks: self.rejectedWork.append(blockName) continue block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0': self.rejectedWork.append(blockName) continue # check lumi restrictions if task.getLumiMask(): accepted_lumis = sum([len(maskedBlocks[blockName][lfn].getLumis()) for lfn in maskedBlocks[blockName]]) # use the information given from getMaskedBlocks to compute che size of the block block['NumberOfFiles'] = len(maskedBlocks[blockName]) # ratio = lumis which are ok in the block / total num lumis ratioAccepted = accepted_lumis / block['NumberOfLumis'] block['NumberOfEvents'] = block['NumberOfEvents'] * ratioAccepted block[self.lumiType] = accepted_lumis # check run restrictions elif runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: self.rejectedWork.append(blockName) continue if len(runs) == len(runLumis): # If there is no change in the runs, then we can skip recalculating lumi counts recalculateLumiCounts = False if recalculateLumiCounts: # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock(fileBlockName=block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 acceptedLumiCount += len(lumiInfo['LumiSectionNumber']) if acceptedFile: acceptedFileCount += 1 if len(fileEntry['LumiList']) != acceptedFileLumiCount: acceptedEventCount += acceptedFileLumiCount * fileEntry['NumberOfEvents'] / len(fileEntry['LumiList']) else: acceptedEventCount += fileEntry['NumberOfEvents'] block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount # save locations if task.getTrustSitelists().get('trustlists'): self.data[block['block']] = self.sites else: self.data[block['block']] = self.siteDB.PNNstoPSNs(dbs.listFileBlockLocation(block['block'])) # TODO: need to decide what to do when location is no find. # There could be case for network problem (no connection to dbs, phedex) # or DBS se is not recorded (This will be retried anyway by location mapper) if not self.data[block['block']]: self.data[block['block']] = ["NoInitialSite"] # # No sites for this block, move it to rejected # self.rejectedWork.append(blockName) # continue validBlocks.append(block) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() validBlocks = [] blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getLumiMask( ): # if we have a lumi mask get only the relevant blocks maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath) if task.getTrustSitelists().get('trustlists'): siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) blocks = [] # Take data inputs or from spec if not self.data: if blockWhiteList: self.data = dict((block, []) for block in blockWhiteList) else: self.data = { datasetPath: [] } # same structure as in WorkQueueElement for data in self.data: if data.find('#') > -1: Lexicon.block(data) # check block name datasetPath = str(data.split('#')[0]) blocks.append(str(data)) else: Lexicon.dataset(data) # check dataset name for block in dbs.listFileBlocks(data, onlyClosedBlocks=True): blocks.append(str(block)) for blockName in blocks: # check block restrictions if blockWhiteList and blockName not in blockWhiteList: continue if blockName in blockBlackList: continue if blockName in self.blockBlackListModifier: # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist continue if task.getLumiMask() and blockName not in maskedBlocks: self.rejectedWork.append(blockName) continue block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # blocks with 0 valid files should be ignored # - ideally they would be deleted but dbs can't delete blocks if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0': self.rejectedWork.append(blockName) continue # check lumi restrictions if task.getLumiMask(): accepted_lumis = sum([ len(maskedBlocks[blockName][lfn].getLumis()) for lfn in maskedBlocks[blockName] ]) # use the information given from getMaskedBlocks to compute che size of the block block['NumberOfFiles'] = len(maskedBlocks[blockName]) # ratio = lumis which are ok in the block / total num lumis ratioAccepted = 1. * accepted_lumis / float( block['NumberOfLumis']) block['NumberOfEvents'] = float( block['NumberOfEvents']) * ratioAccepted block[self.lumiType] = accepted_lumis # check run restrictions elif runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: self.rejectedWork.append(blockName) continue if len(runs) == len(runLumis): # If there is no change in the runs, then we can skip recalculating lumi counts recalculateLumiCounts = False if recalculateLumiCounts: # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock( fileBlockName=block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 acceptedLumiCount += len( lumiInfo['LumiSectionNumber']) if acceptedFile: acceptedFileCount += 1 if len(fileEntry['LumiList'] ) != acceptedFileLumiCount: acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents'] \ / len(fileEntry['LumiList']) else: acceptedEventCount += fileEntry[ 'NumberOfEvents'] block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount # save locations if task.getTrustSitelists().get('trustlists'): self.data[block['block']] = self.sites else: self.data[block['block']] = self.siteDB.PNNstoPSNs( dbs.listFileBlockLocation(block['block'])) # TODO: need to decide what to do when location is no find. # There could be case for network problem (no connection to dbs, phedex) # or DBS se is not recorded (This will be retried anyway by location mapper) if not self.data[block['block']]: self.data[block['block']] = ["NoInitialSite"] # # No sites for this block, move it to rejected # self.rejectedWork.append(blockName) # continue validBlocks.append(block) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() Lexicon.dataset(datasetPath) # check dataset name validBlocks = [] locations = None blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getTrustSitelists(): siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) for blockName in dbs.listFileBlocks(datasetPath): block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # check block restrictions if blockWhiteList and block['block'] not in blockWhiteList: continue if block['block'] in blockBlackList: continue # check run restrictions if runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block['block']) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue if recalculateLumiCounts: # get correct lumi count # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock( fileBlockName=block['block']) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry['LumiList']: runNumber = lumiInfo['RunNumber'] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 if acceptedFile: acceptedFileCount += 1 acceptedLumiCount += acceptedFileLumiCount if len(fileEntry['LumiList'] ) != acceptedFileLumiCount: acceptedEventCount += float( acceptedFileLumiCount ) * fileEntry['NumberOfEvents'] / len( fileEntry['LumiList']) else: acceptedEventCount += fileEntry[ 'NumberOfEvents'] else: acceptedLumiCount = block["NumberOfLumis"] acceptedFileCount = block['NumberOfFiles'] acceptedEventCount = block['NumberOfEvents'] # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) block[self.lumiType] = acceptedLumiCount block['NumberOfFiles'] = acceptedFileCount block['NumberOfEvents'] = acceptedEventCount validBlocks.append(block) if locations is None: locations = set(dbs.listFileBlockLocation(block['block'])) else: locations = locations.intersection( dbs.listFileBlockLocation(block['block'])) # all needed blocks present at these sites if self.wmspec.getTrustLocationFlag(): self.data[datasetPath] = self.sites elif locations: self.data[datasetPath] = list( set(self.siteDB.PNNstoPSNs(locations))) return validBlocks
def validBlocks(self, task, dbs): """Return blocks that pass the input data restriction""" datasetPath = task.getInputDatasetPath() Lexicon.dataset(datasetPath) # check dataset name validBlocks = [] locations = None blockWhiteList = task.inputBlockWhitelist() blockBlackList = task.inputBlockBlacklist() runWhiteList = task.inputRunWhitelist() runBlackList = task.inputRunBlacklist() if task.getTrustSitelists().get("trustlists"): siteWhitelist = task.siteWhitelist() siteBlacklist = task.siteBlacklist() self.sites = makeLocationsList(siteWhitelist, siteBlacklist) for blockName in dbs.listFileBlocks(datasetPath): block = dbs.getDBSSummaryInfo(datasetPath, block=blockName) # check block restrictions if blockWhiteList and block["block"] not in blockWhiteList: continue if block["block"] in blockBlackList: continue # check run restrictions if runWhiteList or runBlackList: # listRunLumis returns a dictionary with the lumi sections per run runLumis = dbs.listRunLumis(block=block["block"]) runs = set(runLumis.keys()) recalculateLumiCounts = False if len(runs) > 1: # If more than one run in the block # Then we must calculate the lumi counts after filtering the run list # This has to be done rarely and requires calling DBS file information recalculateLumiCounts = True # apply blacklist runs = runs.difference(runBlackList) # if whitelist only accept listed runs if runWhiteList: runs = runs.intersection(runWhiteList) # any runs left are ones we will run on, if none ignore block if not runs: continue if recalculateLumiCounts: # get correct lumi count # Recalculate effective size of block # We pull out file info, since we don't do this often acceptedLumiCount = 0 acceptedEventCount = 0 acceptedFileCount = 0 fileInfo = dbs.listFilesInBlock(fileBlockName=block["block"]) for fileEntry in fileInfo: acceptedFile = False acceptedFileLumiCount = 0 for lumiInfo in fileEntry["LumiList"]: runNumber = lumiInfo["RunNumber"] if runNumber in runs: acceptedFile = True acceptedFileLumiCount += 1 if acceptedFile: acceptedFileCount += 1 acceptedLumiCount += acceptedFileLumiCount if len(fileEntry["LumiList"]) != acceptedFileLumiCount: acceptedEventCount += ( float(acceptedFileLumiCount) * fileEntry["NumberOfEvents"] / len(fileEntry["LumiList"]) ) else: acceptedEventCount += fileEntry["NumberOfEvents"] else: acceptedLumiCount = block["NumberOfLumis"] acceptedFileCount = block["NumberOfFiles"] acceptedEventCount = block["NumberOfEvents"] # recalculate effective size of block # make a guess for new event/file numbers from ratio # of accepted lumi sections (otherwise have to pull file info) block[self.lumiType] = acceptedLumiCount block["NumberOfFiles"] = acceptedFileCount block["NumberOfEvents"] = acceptedEventCount validBlocks.append(block) if locations is None: locations = set(dbs.listFileBlockLocation(block["block"])) else: locations = locations.intersection(dbs.listFileBlockLocation(block["block"])) # all needed blocks present at these sites if self.wmspec.getTrustLocationFlag().get("trustlists"): self.data[datasetPath] = self.sites elif locations: self.data[datasetPath] = list(set(self.siteDB.PNNstoPSNs(locations))) return validBlocks