Python sitesFromStorageEelementsの例、WMCore.WorkQueue.WorkQueueUtils.sitesFromStorageEelements Pythonの例

コード例 #1

0

ファイルを表示

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction"""
        validBlocks = []
        # TODO take the chunk size from parameter
        chunkSize = 200

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(
                self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            #if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(
                acdcInfo['collection'],
                acdcBlockSplit['TaskName'],
                acdcBlockSplit['Offset'],
                acdcBlockSplit['NumOfFiles'],
                user=self.wmspec.getOwner().get("name"),
                group=self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            acdcBlocks = acdc.chunkFileset(
                acdcInfo['collection'],
                acdcInfo['fileset'],
                chunkSize,
                user=self.wmspec.getOwner().get("name"),
                group=self.wmspec.getOwner().get("group"))
            for block in acdcBlocks:
                dbsBlock = {}
                dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                                  acdcInfo["fileset"],
                                                  block['offset'],
                                                  block['files'])
                dbsBlock['NumberOfFiles'] = block['files']
                dbsBlock['NumberOfEvents'] = block['events']
                dbsBlock['NumberOfLumis'] = block['lumis']
                dbsBlock["Sites"] = sitesFromStorageEelements(
                    block["locations"])
                dbsBlock['ACDC'] = acdcInfo
                validBlocks.append(dbsBlock)

        return validBlocks

コード例 #2

0

ファイルを表示

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath) # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRuns returns a run number per lumi section
                full_lumi_list = dbs.listRuns(block = block['block'])
                runs = set(full_lumi_list)
                
                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                accepted_lumis = [x for x in full_lumi_list if x in runs]
                ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list)
                block[self.lumiType] = len(accepted_lumis)
                block['NumberOfFiles'] = float(block['NumberOfFiles']) * ratio_accepted
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted

            validBlocks.append(block)
            if locations is None:
                locations = set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])))
            else:
                locations = locations.intersection(set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))))

        # all needed blocks present at these sites
        if locations:
            self.data[datasetPath] = list(locations)
        return validBlocks

コード例 #3

0

ファイルを表示

ファイル: ResubmitBlock.py プロジェクト: ticoann/WMCore

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction"""
        validBlocks = []
        # TODO take the chunk size from parameter
        chunkSize = 200

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            #if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'],
                                      user = self.wmspec.getOwner().get("name"),
                                      group = self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            acdcBlocks = acdc.chunkFileset(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           chunkSize,
                                           user = self.wmspec.getOwner().get("name"),
                                           group = self.wmspec.getOwner().get("group"))
            for block in acdcBlocks:
                dbsBlock = {}
                dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                                  acdcInfo["fileset"],
                                                  block['offset'], block['files'])
                dbsBlock['NumberOfFiles'] = block['files']
                dbsBlock['NumberOfEvents'] = block['events']
                dbsBlock['NumberOfLumis'] = block['lumis']
                dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
                dbsBlock['ACDC'] = acdcInfo
                validBlocks.append(dbsBlock)

        return validBlocks

コード例 #4

0

ファイルを表示

ファイル: ResubmitBlock.py プロジェクト: tsarangi/WMCore

 def singleChunk(self, acdc, acdcInfo, task):
     """Return a single block (inside a list) with all associated ACDC records"""
     result = []
     acdcBlock = acdc.singleChunkFileset(
         acdcInfo['collection'],
         acdcInfo['fileset'],
         user=self.wmspec.getOwner().get("name"),
         group=self.wmspec.getOwner().get("group"))
     dbsBlock = {}
     dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                       acdcInfo["fileset"],
                                       acdcBlock['offset'],
                                       acdcBlock['files'])
     dbsBlock['NumberOfFiles'] = acdcBlock['files']
     dbsBlock['NumberOfEvents'] = acdcBlock['events']
     dbsBlock['NumberOfLumis'] = acdcBlock['lumis']
     if task.inputLocationFlag():
         dbsBlock["Sites"] = self.sites
     else:
         dbsBlock["Sites"] = sitesFromStorageEelements(
             acdcBlock["locations"])
     dbsBlock['ACDC'] = acdcInfo
     if dbsBlock['NumberOfFiles']:
         result.append(dbsBlock)
     return result

コード例 #5

0

ファイルを表示

ファイル: Block.py プロジェクト: ticoann/WMCore

    def split(self):
        """Apply policy to spec"""
        dbs = self.dbs()
        for block in self.validBlocks(self.initialTask, dbs):
            #set the parent flag for processing only for clarity on the couch doc
            parentList = {}
            parentFlag = False
            #TODO this is slow process needs to change in DBS3
            if self.initialTask.parentProcessingFlag():
                parentFlag = True
                for dbsBlock in dbs.listBlockParents(block["block"]):
                    if self.initialTask.inputLocationFlag():
                        parentList[dbsBlock["Name"]] = self.sites
                    else:
                        parentList[dbsBlock["Name"]] = sitesFromStorageEelements(dbsBlock['StorageElementList'])

            self.newQueueElement(Inputs = {block['block'] : self.data.get(block['block'], [])},
                                 ParentFlag = parentFlag,
                                 ParentData = parentList,
                                 NumberOfLumis = int(block[self.lumiType]),
                                 NumberOfFiles = int(block['NumberOfFiles']),
                                 NumberOfEvents = int(block['NumberOfEvents']),
                                 Jobs = ceil(float(block[self.args['SliceType']]) /
                                             float(self.args['SliceSize'])),
                                 OpenForNewData = True if str(block.get('OpenForWriting')) == '1' else False,
                                 NoLocationUpdate = self.initialTask.inputLocationFlag()
                                 )

コード例 #6

0

ファイルを表示

    def split(self):
        """Apply policy to spec"""
        dbs = self.dbs()
        for block in self.validBlocks(self.initialTask, dbs):
            #set the parent flag for processing only for clarity on the couch doc
            parentList = {}
            parentFlag = False
            #TODO this is slow process needs to change in DBS3
            if self.initialTask.parentProcessingFlag():
                parentFlag = True
                for dbsBlock in dbs.listBlockParents(block["block"]):
                    if self.initialTask.inputLocationFlag():
                        parentList[dbsBlock["Name"]] = self.sites
                    else:
                        parentList[
                            dbsBlock["Name"]] = sitesFromStorageEelements(
                                dbsBlock['StorageElementList'])

            self.newQueueElement(
                Inputs={block['block']: self.data.get(block['block'], [])},
                ParentFlag=parentFlag,
                ParentData=parentList,
                NumberOfLumis=int(block[self.lumiType]),
                NumberOfFiles=int(block['NumberOfFiles']),
                NumberOfEvents=int(block['NumberOfEvents']),
                Jobs=ceil(
                    float(block[self.args['SliceType']]) /
                    float(self.args['SliceSize'])),
                OpenForNewData=True
                if str(block.get('OpenForWriting')) == '1' else False,
                NoLocationUpdate=self.initialTask.inputLocationFlag())

コード例 #7

0

ファイルを表示

ファイル: ResubmitBlock.py プロジェクト: AndrewLevin/WMCore

 def fixedSizeChunk(self, acdc, acdcInfo, task):
     """Return a set of blocks with a fixed number of ACDC records"""
     fixedSizeBlocks = []
     chunkSize = 250
     acdcBlocks = acdc.chunkFileset(acdcInfo['collection'],
                                    acdcInfo['fileset'],
                                    chunkSize,
                                    user = self.wmspec.getOwner().get("name"),
                                    group = self.wmspec.getOwner().get("group"))
     for block in acdcBlocks:
         dbsBlock = {}
         dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                           acdcInfo["fileset"],
                                           block['offset'], block['files'])
         dbsBlock['NumberOfFiles'] = block['files']
         dbsBlock['NumberOfEvents'] = block['events']
         dbsBlock['NumberOfLumis'] = block['lumis']
         if task.inputLocationFlag():
             dbsBlock["Sites"] = self.sites
         else:
             dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
         dbsBlock['ACDC'] = acdcInfo
         if dbsBlock['NumberOfFiles']:
             fixedSizeBlocks.append(dbsBlock)
     return fixedSizeBlocks

コード例 #8

0

ファイルを表示

ファイル: ResubmitBlock.py プロジェクト: tsarangi/WMCore

 def fixedSizeChunk(self, acdc, acdcInfo, task):
     """Return a set of blocks with a fixed number of ACDC records"""
     fixedSizeBlocks = []
     chunkSize = 250
     acdcBlocks = acdc.chunkFileset(
         acdcInfo['collection'],
         acdcInfo['fileset'],
         chunkSize,
         user=self.wmspec.getOwner().get("name"),
         group=self.wmspec.getOwner().get("group"))
     for block in acdcBlocks:
         dbsBlock = {}
         dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                           acdcInfo["fileset"],
                                           block['offset'], block['files'])
         dbsBlock['NumberOfFiles'] = block['files']
         dbsBlock['NumberOfEvents'] = block['events']
         dbsBlock['NumberOfLumis'] = block['lumis']
         if task.inputLocationFlag():
             dbsBlock["Sites"] = self.sites
         else:
             dbsBlock["Sites"] = sitesFromStorageEelements(
                 block["locations"])
         dbsBlock['ACDC'] = acdcInfo
         if dbsBlock['NumberOfFiles']:
             fixedSizeBlocks.append(dbsBlock)
     return fixedSizeBlocks

コード例 #9

0

ファイルを表示

ファイル: StartPolicyInterface.py プロジェクト: AndrewLevin/WMCore

 def getDatasetLocations(self, datasets):
     """Returns a dictionary with the location of the datasets according to DBS"""
     result = {}
     for dbsUrl in datasets:
         dbs = self.dbs(dbsUrl)
         for datasetPath in datasets[dbsUrl]:
             locations = sitesFromStorageEelements(dbs.listDatasetLocation(datasetPath))
             result[datasetPath] = locations
     return result

コード例 #10

0

ファイルを表示

ファイル: StartPolicyInterface.py プロジェクト: samircury/WMCore

 def getDatasetLocations(self, datasets):
     """Returns a dictionary with the location of the datasets according to DBS"""
     result = {}
     for dbsUrl in datasets:
         dbs = self.dbs(dbsUrl)
         for datasetPath in datasets[dbsUrl]:
             locations = sitesFromStorageEelements(
                 dbs.listDatasetLocation(datasetPath))
             result[datasetPath] = locations
     return result

コード例 #11

0

ファイルを表示

ファイル: ResubmitBlock.py プロジェクト: tsarangi/WMCore

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(
                self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(
                acdcInfo['collection'],
                acdcBlockSplit['TaskName'],
                acdcBlockSplit['Offset'],
                acdcBlockSplit['NumOfFiles'],
                user=self.wmspec.getOwner().get("name"),
                group=self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.inputLocationFlag():
                dbsBlock["Sites"] = self.sites
            else:
                dbsBlock["Sites"] = sitesFromStorageEelements(
                    block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(
                    self.wmspec, 'ACDC is not supported for %s' %
                    self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks

コード例 #12

0

ファイルを表示

    def split(self):
        """Apply policy to spec"""
        dbs = self.dbs()
        for block in self.validBlocks(self.initialTask, dbs):
            #set the parent flag for processing only for clarity on the couch doc
            parentList = {}
            parentFlag = False
            #TODO this is slow process needs to change in DBS3
            if self.initialTask.parentProcessingFlag():
                parentFlag = True
                for dbsBlock in dbs.listBlockParents(block["block"]):
                    parentList[dbsBlock["Name"]] = sitesFromStorageEelements(dbsBlock['StorageElementList'])

            self.newQueueElement(Inputs = {block['block'] : self.data.get(block['block'], [])},
                                 ParentFlag = parentFlag,
                                 ParentData = parentList,
                                 Jobs = ceil(float(block[self.args['SliceType']]) /
                                             float(self.args['SliceSize']))
                                 )

コード例 #13

0

ファイルを表示

ファイル: ResubmitBlock.py プロジェクト: AndrewLevin/WMCore

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'],
                                      user = self.wmspec.getOwner().get("name"),
                                      group = self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.inputLocationFlag():
                dbsBlock["Sites"] = self.sites
            else:
                dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(self.wmspec, 'ACDC is not supported for %s' % self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks

コード例 #14

0

ファイルを表示

ファイル: ResubmitBlock.py プロジェクト: AndrewLevin/WMCore

 def singleChunk(self, acdc, acdcInfo, task):
     """Return a single block (inside a list) with all associated ACDC records"""
     result = []
     acdcBlock = acdc.singleChunkFileset(acdcInfo['collection'],
                                          acdcInfo['fileset'],
                                          user = self.wmspec.getOwner().get("name"),
                                          group = self.wmspec.getOwner().get("group"))
     dbsBlock = {}
     dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                       acdcInfo["fileset"],
                                       acdcBlock['offset'], acdcBlock['files'])
     dbsBlock['NumberOfFiles'] = acdcBlock['files']
     dbsBlock['NumberOfEvents'] = acdcBlock['events']
     dbsBlock['NumberOfLumis'] = acdcBlock['lumis']
     if task.inputLocationFlag():
         dbsBlock["Sites"] = self.sites
     else:
         dbsBlock["Sites"] = sitesFromStorageEelements(acdcBlock["locations"])
     dbsBlock['ACDC'] = acdcInfo
     if dbsBlock['NumberOfFiles']:
         result.append(dbsBlock)
     return result

コード例 #15

0

ファイルを表示

ファイル: Dataset.py プロジェクト: ticoann/WMCore

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                acceptedLumiCount = 0
                fullLumiCount = 0
                acceptedLumiCount = 0
                fullLumiCount = 0
                for run in runLumis:
                    if run in runs:
                        acceptedLumiCount += runLumis[run]
                    fullLumiCount += runLumis[run]
                ratioAccepted = float(acceptedLumiCount) / fullLumiCount
                block[self.lumiType] = acceptedLumiCount
                block['NumberOfFiles'] = int(
                    float(block['NumberOfFiles']) * ratioAccepted)
                block['NumberOfEvents'] = int(
                    float(block['NumberOfEvents']) * ratioAccepted)

            validBlocks.append(block)
            if locations is None:
                locations = set(
                    sitesFromStorageEelements(
                        dbs.listFileBlockLocation(block['block'])))
            else:
                locations = locations.intersection(
                    set(
                        sitesFromStorageEelements(
                            dbs.listFileBlockLocation(block['block']))))

        # all needed blocks present at these sites
        if locations:
            self.data[datasetPath] = list(locations)
        return validBlocks

コード例 #16

0

ファイルを表示

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(
        ):  #if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.inputLocationFlag():
            # Then get the locations from the site whitelist/blacklist + SiteDB
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {
                    datasetPath: []
                }  # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data)  # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data)  # check dataset name
                for block in dbs.listFileBlocks(data, onlyClosedBlocks=True):
                    blocks.append(str(block))

        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if blockName in self.blockBlackListModifier:
                # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                self.rejectedWork.append(blockName)
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                self.rejectedWork.append(blockName)
                continue

            #check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum([
                    len(maskedBlocks[blockName][file].getLumis())
                    for file in maskedBlocks[blockName]
                ])
                #use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                #ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = 1. * accepted_lumis / float(
                    block['NumberOfLumis'])
                block['NumberOfEvents'] = float(
                    block['NumberOfEvents']) * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    self.rejectedWork.append(blockName)
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(
                        fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']
                                   ) != acceptedFileLumiCount:
                                acceptedEventCount += float(
                                    acceptedFileLumiCount
                                ) * fileEntry['NumberOfEvents'] / len(
                                    fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry[
                                    'NumberOfEvents']
                    block[self.lumiType] = acceptedLumiCount
                    block['NumberOfFiles'] = acceptedFileCount
                    block['NumberOfEvents'] = acceptedEventCount
            # save locations
            if task.inputLocationFlag():
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = sitesFromStorageEelements(
                    dbs.listFileBlockLocation(block['block']))

            # TODO: need to decide what to do when location is no find.
            # There could be case for network problem (no connection to dbs, phedex)
            # or DBS se is not recorded (This will be retried anyway by location mapper)
            if not self.data[block['block']]:
                self.data[block['block']] = ["NoInitialSite"]
            #    # No sites for this block, move it to rejected
            #    self.rejectedWork.append(blockName)
            #    continue

            validBlocks.append(block)
        return validBlocks

コード例 #17

0

ファイルを表示

ファイル: Block.py プロジェクト: zhiwenuil/WMCore

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            self.data = {datasetPath : []} # same structure as in WorkQueueElement
            #blocks = dbs.getFileBlocksInfo(datasetPath, locations = False)
        #else:
            #dataItems = self.data.keys()

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data) # check dataset name
                datasetPath = str(data.split('#')[0])
                blocks.extend(dbs.getFileBlocksInfo(datasetPath, blockName = str(data), locations = True))
            else:
                Lexicon.dataset(data) # check dataset name
                blocks.extend(dbs.getFileBlocksInfo(datasetPath, locations = True))

        for block in blocks:
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles']:
                continue

            # check block restrictions
            if blockWhiteList and block['Name'] not in blockWhiteList:
                continue
            if block['Name'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRuns returns a run number per lumi section
                full_lumi_list = dbs.listRuns(block = block['Name'])
                runs = set(full_lumi_list)

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)

                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                accepted_lumis = [x for x in full_lumi_list if x in runs]
                ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list)
                block[self.lumiType] = len(accepted_lumis)
                block['NumberOfFiles'] *= ratio_accepted
                block['NumberOfEvents'] *= ratio_accepted

            # get lumi info if needed and not already available
            if self.args['SliceType'] == self.lumiType and not block.get(self.lumiType):
                blockSummary = dbs.getDBSSummaryInfo(block = block["Name"])
                block[self.lumiType] = blockSummary[self.lumiType]

            # save locations
            self.data[block['Name']] = sitesFromStorageEelements([x['Name'] for x in block['StorageElementList']])

            validBlocks.append(block)
        return validBlocks

コード例 #18

0

ファイルを表示

ファイル: Block.py プロジェクト: vlimant/WMCore

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask():  # if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.inputLocationFlag():
            # Then get the locations from the site whitelist/blacklist + SiteDB
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {datasetPath: []}  # same structure as in WorkQueueElement

        for data in self.data:
            if data.find("#") > -1:
                Lexicon.block(data)  # check block name
                datasetPath = str(data.split("#")[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data)  # check dataset name
                for block in dbs.listFileBlocks(data, onlyClosedBlocks=True):
                    blocks.append(str(block))

        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if blockName in self.blockBlackListModifier:
                # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                self.rejectedWork.append(blockName)
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block["NumberOfFiles"] or block["NumberOfFiles"] == "0":
                self.rejectedWork.append(blockName)
                continue

            # check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum([len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName]])
                # use the information given from getMaskedBlocks to compute che size of the block
                block["NumberOfFiles"] = len(maskedBlocks[blockName])
                # ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = 1.0 * accepted_lumis / float(block["NumberOfLumis"])
                block["NumberOfEvents"] = float(block["NumberOfEvents"]) * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block["block"])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    self.rejectedWork.append(blockName)
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName=block["block"])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry["LumiList"]:
                            runNumber = lumiInfo["RunNumber"]
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry["LumiList"]) != acceptedFileLumiCount:
                                acceptedEventCount += (
                                    float(acceptedFileLumiCount)
                                    * fileEntry["NumberOfEvents"]
                                    / len(fileEntry["LumiList"])
                                )
                            else:
                                acceptedEventCount += fileEntry["NumberOfEvents"]
                    block[self.lumiType] = acceptedLumiCount
                    block["NumberOfFiles"] = acceptedFileCount
                    block["NumberOfEvents"] = acceptedEventCount
            # save locations
            if task.inputLocationFlag():
                self.data[block["block"]] = self.sites
            else:
                self.data[block["block"]] = sitesFromStorageEelements(dbs.listFileBlockLocation(block["block"]))

            # TODO: need to decide what to do when location is no find.
            # There could be case for network problem (no connection to dbs, phedex)
            # or DBS se is not recorded (This will be retried anyway by location mapper)
            if not self.data[block["block"]]:
                self.data[block["block"]] = ["NoInitialSite"]
            #    # No sites for this block, move it to rejected
            #    self.rejectedWork.append(blockName)
            #    continue

            validBlocks.append(block)
        return validBlocks

コード例 #19

0

ファイルを表示

ファイル: Block.py プロジェクト: ticoann/WMCore

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.inputLocationFlag():
            # Then get the locations from the site whitelist/blacklist + SiteDB
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            if siteWhitelist:
                # Just get the ses matching the whitelists
                self.sites = siteWhitelist
            elif siteBlacklist:
                # Get all CMS sites less the blacklist
                allSites = cmsSiteNames()
                self.sites = list(set(allSites) - set (siteBlacklist))
            else:
                # Run at any CMS site
                self.sites = cmsSiteNames()

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {datasetPath : []} # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data) # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data) # check dataset name
                for block in dbs.listFileBlocks(data):
                    blocks.append(str(block))


        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                continue

            #check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] )
                #use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                #ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = 1. * accepted_lumis / float(block['NumberOfLumis'])
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block = block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName = block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']) != acceptedFileLumiCount:
                                acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents']/len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry['NumberOfEvents']
                    block[self.lumiType] = acceptedLumiCount
                    block['NumberOfFiles'] = acceptedFileCount
                    block['NumberOfEvents'] = acceptedEventCount
            # save locations
            if task.inputLocationFlag():
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))

            validBlocks.append(block)
        return validBlocks

コード例 #20

0

ファイルを表示

ファイル: Block.py プロジェクト: stuartw/WMCore

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(): #if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {datasetPath : []} # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data) # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data) # check dataset name
                for block in dbs.listFileBlocks(data):
                    blocks.append(str(block))


        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                continue

            #check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum( [ len(maskedBlocks[blockName][file]) for file in maskedBlocks[blockName] ] )
                #use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                #ratio =  lumis which are ok in the block / total num lumis
                ratio_accepted = 1. * accepted_lumis / float(block['NumberOfLumis'])
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRuns returns a run number per lumi section
                full_lumi_list = dbs.listRuns(block = block['block'])
                runs = set(full_lumi_list)

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)

                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                accepted_lumis = [x for x in full_lumi_list if x in runs]
                ratio_accepted = 1. * len(accepted_lumis) / len(full_lumi_list)
                block[self.lumiType] = len(accepted_lumis)
                block['NumberOfFiles'] = float(block['NumberOfFiles']) * ratio_accepted
                block['NumberOfEvents'] = float(block['NumberOfEvents']) * ratio_accepted

            # save locations
            self.data[block['block']] = sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))

            validBlocks.append(block)
        return validBlocks

コード例 #21

0

ファイルを表示

ファイル: Dataset.py プロジェクト: tsarangi/WMCore

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        siteWhiteList = task.siteWhitelist()

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                if recalculateLumiCounts:
                    # get correct lumi count
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(
                        fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']
                                   ) != acceptedFileLumiCount:
                                acceptedEventCount += float(
                                    acceptedFileLumiCount
                                ) * fileEntry['NumberOfEvents'] / len(
                                    fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry[
                                    'NumberOfEvents']
                else:
                    acceptedLumiCount = block["NumberOfLumis"]
                    acceptedFileCount = block['NumberOfFiles']
                    acceptedEventCount = block['NumberOfEvents']

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)

                fullLumiCount = block["NumberOfLumis"]
                block[self.lumiType] = acceptedLumiCount
                block['NumberOfFiles'] = acceptedFileCount
                block['NumberOfEvents'] = acceptedEventCount

            validBlocks.append(block)
            if locations is None:
                locations = set(
                    sitesFromStorageEelements(
                        dbs.listFileBlockLocation(block['block'])))
            else:
                locations = locations.intersection(
                    set(
                        sitesFromStorageEelements(
                            dbs.listFileBlockLocation(block['block']))))

            if self.wmspec.locationDataSourceFlag():
                locations = locations.union(siteWhiteList)

        # all needed blocks present at these sites
        if locations:
            self.data[datasetPath] = list(locations)
        return validBlocks

コード例 #22

0

ファイルを表示

ファイル: Dataset.py プロジェクト: lucacopa/WMCore

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath) # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        siteWhiteList = task.siteWhitelist()

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block = blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block = block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue
                
                if recalculateLumiCounts:
                    # get correct lumi count
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName = block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']) != acceptedFileLumiCount:
                                acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents']/len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry['NumberOfEvents']
                else:
                    acceptedLumiCount = block["NumberOfLumis"]
                    acceptedFileCount = block['NumberOfFiles']
                    acceptedEventCount = block['NumberOfEvents']
                    
                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)
                
                fullLumiCount = block["NumberOfLumis"]
                block[self.lumiType] = acceptedLumiCount
                block['NumberOfFiles'] = acceptedFileCount
                block['NumberOfEvents'] = acceptedEventCount

            validBlocks.append(block)
            if locations is None:
                locations = set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block'])))
            else:
                locations = locations.intersection(set(sitesFromStorageEelements(dbs.listFileBlockLocation(block['block']))))
            
            if self.wmspec.locationDataSourceFlag():
                locations = locations.union(siteWhiteList)

        # all needed blocks present at these sites
        if locations:
            self.data[datasetPath] = list(locations)
        return validBlocks