Example #1
0
    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0] #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           splitedBlockName['Offset'],
                                           splitedBlockName['NumOfFiles'],
                                           user = wmspec.getOwner().get("name"),
                                           group = wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)
        return blockName, dbsBlockDict[blockName]
Example #2
0
    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0] #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           splitedBlockName['Offset'],
                                           splitedBlockName['NumOfFiles'],
                                           user = wmspec.getOwner().get("name"),
                                           group = wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)

            if wmspec.locationDataSourceFlag():
                blockInfo = dbsBlockDict[blockName]
                seElements = []
                for cmsSite in match['Inputs'].values()[0]: #TODO: Allow more than one
                    ses = self.SiteDB.cmsNametoSE(cmsSite)
                    seElements.extend(ses)
                seElements = list(set(seElements))
                blockInfo['StorageElements'] = seElements
        return blockName, dbsBlockDict[blockName]
Example #3
0
    def validBlocks(self, task):
        """Return blocks that pass the input data restriction"""
        validBlocks = []
        # TODO take the chunk size from parameter
        chunkSize = 200

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(
                self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            #if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(
                acdcInfo['collection'],
                acdcBlockSplit['TaskName'],
                acdcBlockSplit['Offset'],
                acdcBlockSplit['NumOfFiles'],
                user=self.wmspec.getOwner().get("name"),
                group=self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            acdcBlocks = acdc.chunkFileset(
                acdcInfo['collection'],
                acdcInfo['fileset'],
                chunkSize,
                user=self.wmspec.getOwner().get("name"),
                group=self.wmspec.getOwner().get("group"))
            for block in acdcBlocks:
                dbsBlock = {}
                dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                                  acdcInfo["fileset"],
                                                  block['offset'],
                                                  block['files'])
                dbsBlock['NumberOfFiles'] = block['files']
                dbsBlock['NumberOfEvents'] = block['events']
                dbsBlock['NumberOfLumis'] = block['lumis']
                dbsBlock["Sites"] = sitesFromStorageEelements(
                    block["locations"])
                dbsBlock['ACDC'] = acdcInfo
                validBlocks.append(dbsBlock)

        return validBlocks
Example #4
0
    def validBlocks(self, task):
        """Return blocks that pass the input data restriction"""
        validBlocks = []
        # TODO take the chunk size from parameter
        chunkSize = 200

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            #if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'],
                                      user = self.wmspec.getOwner().get("name"),
                                      group = self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            acdcBlocks = acdc.chunkFileset(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           chunkSize,
                                           user = self.wmspec.getOwner().get("name"),
                                           group = self.wmspec.getOwner().get("group"))
            for block in acdcBlocks:
                dbsBlock = {}
                dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                                  acdcInfo["fileset"],
                                                  block['offset'], block['files'])
                dbsBlock['NumberOfFiles'] = block['files']
                dbsBlock['NumberOfEvents'] = block['events']
                dbsBlock['NumberOfLumis'] = block['lumis']
                dbsBlock["Sites"] = sitesFromStorageEelements(block["locations"])
                dbsBlock['ACDC'] = acdcInfo
                validBlocks.append(dbsBlock)

        return validBlocks
Example #5
0
    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(
                self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(
                acdcInfo['collection'],
                acdcBlockSplit['TaskName'],
                acdcBlockSplit['Offset'],
                acdcBlockSplit['NumOfFiles'],
                user=self.wmspec.getOwner().get("name"),
                group=self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.getTrustSitelists():
                dbsBlock["Sites"] = self.sites
            else:
                # TODO remove this line when all DBS origin_site_name is converted to PNN
                block["locations"] = self.siteDB.checkAndConvertSENameToPNN(
                    block["locations"])
                # upto this
                dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(
                    self.wmspec, 'ACDC is not supported for %s' %
                    self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks
Example #6
0
def main():
    start = time.time()
    # blockName = match['Inputs'].keys()[0]
    blockName = "/acdc/vlimant_ACDC0_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_190218_145226_481/:pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222:SUS-RunIIFall18wmLHEGS-00025_0/0/31055"

    # acdcInfo = match['ACDC']
    acdcInfo = {"database": "acdcserver",
                "fileset": "/pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222/SUS-RunIIFall18wmLHEGS-00025_0",
                "collection": "pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222",
                "server": "https://cmsweb.cern.ch/couchdb"}

    acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
    splitedBlockName = ACDCBlock.splitBlockName(blockName)
    print("Splitted block name: %s" % splitedBlockName)

    fileLists = acdc.getChunkFiles(acdcInfo['collection'],
                                   acdcInfo['fileset'],
                                   splitedBlockName['Offset'],
                                   splitedBlockName['NumOfFiles'])
    print("Retrieved %d unique files from the ACDCServer" % len(fileLists))

    block = {}
    block["Files"] = fileLists

    wantedLumis = set([252052, 240646])
    for f in fileLists:
        for run in f['runs']:
            maskDict = run.json()
            lumisSet = set(maskDict['Lumis'].keys())
            if wantedLumis.intersection(lumisSet):
                print("File: %s with events: %s, contains these lumis: %s" % (f['lfn'], f['events'], wantedLumis.intersection(lumisSet)))

    # with open("chunkfiles.json", 'w') as fo:
    #     json.dump(block, fo)

    end = time.time()
    print("Spent %s secs running so far" % (end - start))
    sys.exit(1)

    ### Now doing the WMBSHelper stuff
    reqUrl = "https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache"
    requestName = "vlimant_ACDC0_task_HIG-RunIIFall17wmLHEGS-01122__v1_T_180808_130708_5376"

    wmspec = WMWorkloadHelper()
    wmspec.loadSpecFromCouch(reqUrl, requestName)
    taskName = "HIG-RunIIFall17DRPremix-00788_0"
    mask = None
    cacheDir = "/data/srv/wmagent/v1.1.14.patch6/install/wmagent/WorkQueueManager/cache"
    # wmbsHelper = WMBSHelper(wmspec, match['TaskName'], blockName, mask, self.params['CacheDir'])
    wmbsHelper = WMBSHelper(wmspec, taskName, blockName, mask, cacheDir)
    sub, numFilesAdded = wmbsHelper.createSubscriptionAndAddFiles(block=block)
Example #7
0
    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'],
                                      user = self.wmspec.getOwner().get("name"),
                                      group = self.wmspec.getOwner().get("group"))
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.inputLocationFlag():
                dbsBlock["Sites"] = self.sites
            else:
                #TODO remove this line when all DBS origin_site_name is converted to PNN
                block["locations"] = self.siteDB.checkAndConvertSENameToPNN(block["locations"])
                #upto this
                dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(self.wmspec, 'ACDC is not supported for %s' % self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks
Example #8
0
    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(
                self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(next(iter(self.data)))
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = next(iter(self.data))
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'])
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.getTrustSitelists().get('trustlists'):
                dbsBlock["Sites"] = self.sites
            else:
                dbsBlock["Sites"] = self.cric.PNNstoPSNs(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(
                    self.wmspec, 'ACDC is not supported for %s' %
                    self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks
Example #9
0
    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'])
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.getTrustSitelists().get('trustlists'):
                dbsBlock["Sites"] = self.sites
            else:
                dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(self.wmspec, 'ACDC is not supported for %s' % self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks
Example #10
0
    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0]  #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"],
                                         acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(
                acdcInfo['collection'],
                acdcInfo['fileset'],
                splitedBlockName['Offset'],
                splitedBlockName['NumOfFiles'],
                user=wmspec.getOwner().get("name"),
                group=wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)

            if wmspec.locationDataSourceFlag():
                blockInfo = dbsBlockDict[blockName]
                seElements = []
                for cmsSite in match['Inputs'].values(
                )[0]:  #TODO: Allow more than one
                    ses = self.SiteDB.cmsNametoSE(cmsSite)
                    seElements.extend(ses)
                seElements = list(set(seElements))
                blockInfo['StorageElements'] = seElements
        return blockName, dbsBlockDict[blockName]