Exemple #1
0
def manageDatasetBlocks(datasetPath,
                        localDBS,
                        globalDBS,
                        phedexConfig=None,
                        phedexNodes=None):
    """
    _manageDatasetBlocks_

    Trawl through the dataset for all remaining open blocks, and then close them,
    migrate them to global and inject them into PhEDEx if phedexConfig is not None, using
    the optional list of PhEDEx nodes if provided.


    """
    dbs = DBSReader(localDBS)
    blocks = dbs.listFileBlocks(datasetPath)

    for block in blocks:
        if dbs.blockIsOpen(block):
            blockMgr = BlockManager(block, localDbs, globalDbs, datasetPath)
            blockMgr.closeBlock()
            blockMgr.migrateToGlobalDBS()
            if phedexConfig != None:
                blockMgr.injectBlockToPhEDEx(phedexConfig, phedexNodes)

    return
    def __call__(self):
        """
        _operator()_

        Load PU dataset information from DBS

        """
        
        
        reader = DBSReader(self.dbsUrl)
        blocks = reader.listFileBlocks(self.dataset, False)
        
        for block in blocks:
            #  //
            # // Populate locations
            #//
            locations = reader.listFileBlockLocation(block)
            if locations:
                self.blockSites[block] = locations
            for location in locations:
                if not self.sites.has_key(location):
                    self.sites[location] = set()
                self.sites[location].add(block)
            #  //
            # // Populate File list for block
            #//
            self[block] = reader.lfnsInBlock(block)

        return
Exemple #3
0
    def __call__(self):
        """
        _operator()_

        Load PU dataset information from DBS

        """

        reader = DBSReader(self.dbsUrl)
        blocks = reader.listFileBlocks(self.dataset, False)

        for block in blocks:
            #  //
            # // Populate locations
            #//
            locations = reader.listFileBlockLocation(block)
            if locations:
                self.blockSites[block] = locations
            for location in locations:
                if not self.sites.has_key(location):
                    self.sites[location] = set()
                self.sites[location].add(block)
            #  //
            # // Populate File list for block
            #//
            self[block] = reader.lfnsInBlock(block)

        return
def createJobSplitter(dataset, dbsUrl, onlyClosedBlocks=False, siteWhitelist=[], blockWhitelist=[], withParents=False):
    """
    _createJobSplitter_

    Instantiate a JobSplitter instance for the dataset provided
    and populate it with details from DBS.


    """
    reader = DBSReader(dbsUrl)
    result = JobSplitter(dataset)
    filterSites = len(siteWhitelist) > 0
    filterBlocks = len(blockWhitelist) > 0

    for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks):
        locations = reader.listFileBlockLocation(blockName)
        if filterBlocks:
            if blockName not in blockWhitelist:
                msg = "Excluding block %s based on block whitelist: %s\n" % (blockName, blockWhitelist)
                logging.debug(msg)
                continue

        if filterSites:
            siteMatches = filter(lambda x: x in locations, siteWhitelist)

            if len(siteMatches) == 0:
                msg = "Excluding block %s based on sites: %s \n" % (blockName, locations)
                logging.debug(msg)
                continue
            else:
                locations = siteMatches

        newBlock = result.newFileblock(blockName, *locations)

        if withParents == True:
            blockData = reader.getFileBlockWithParents(blockName)[blockName]
        else:
            blockData = reader.getFileBlock(blockName)[blockName]

        totalEvents = 0
        fileList = set()
        for fileInfo in blockData["Files"]:
            totalEvents += fileInfo["NumberOfEvents"]
            fileList.add(fileInfo["LogicalFileName"])
            if withParents:
                parList = [x["LogicalFileName"] for x in fileInfo["ParentList"]]

                newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"], parList)
            else:
                newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"])

        logging.debug("Block %s contains %s events in %s files" % (blockName, totalEvents, len(fileList)))

    return result
Exemple #5
0
    def loadSites(self, **dbsContacts):
        """
        Get the list of sites hosting the PU from DBS/DLS
                                                                                                              
        """
        dbsUrl = dbsContacts.get('DBSURL', None)
        if dbsUrl == None:
            dbsUrl = getLocalDBSURL()
        
        reader = DBSReader(dbsUrl)

        locations = []        
        blocks =  reader.listFileBlocks(self.dataset, True)

        for block in blocks:
            try:
                locations = reader.listFileBlockLocation(block)
            except Exception, ex:
                msg = "Unable to find DLS Locations for Block: %s\n" %  block
                msg += str(ex)
                logging.warning(msg)
                continue
Exemple #6
0
def manageDatasetBlocks(datasetPath, localDBS, globalDBS, phedexConfig = None, phedexNodes = None):
    """
    _manageDatasetBlocks_

    Trawl through the dataset for all remaining open blocks, and then close them,
    migrate them to global and inject them into PhEDEx if phedexConfig is not None, using
    the optional list of PhEDEx nodes if provided.


    """
    dbs = DBSReader(localDBS)
    blocks = dbs.listFileBlocks(datasetPath)

    for block in blocks:
        if dbs.blockIsOpen(block):
            blockMgr = BlockManager(block, localDbs, globalDbs, datasetPath)
            blockMgr.closeBlock()
            blockMgr.migrateToGlobalDBS()
            if phedexConfig != None:
                blockMgr.injectBlockToPhEDEx(phedexConfig, phedexNodes)

    return
Exemple #7
0
    def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks):
        """
        _migrateDatasetBlocks_

        Migrate the list of fileblocks provided by blocks, belonging
        to the dataset specified by the dataset path to this DBS instance
        from the inputDBSUrl provided

        - *inputDBSUrl* : URL for connection to input DBS
        - *datasetPath* : Name of dataset in input DBS (must exist in input
                          DBS)
        - *blocks*      : list of block names to be migrated (must exist
                          in input DBS)

        """
        if len(blocks) == 0:
            msg = "FileBlocks not provided.\n"
            msg += "You must provide the name of at least one fileblock\n"
            msg += "to be migrated"
            raise DBSWriterError(msg)
        #  //
        # // Hook onto input DBSUrl and verify that the dataset & blocks
        #//  exist
        reader = DBSReader(inputDBSUrl)
        
        inputBlocks = reader.listFileBlocks(datasetPath)
        
        for block in blocks:
            #  //
            # // Test block exists at source
            #// 
            if block not in inputBlocks:
                msg = "Block name:\n ==> %s\n" % block
                msg += "Not found in input dataset:\n ==> %s\n" % datasetPath
                msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl
                raise DBSWriterError(msg)

            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not self.reader.blockIsOpen(block):
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Migration of that block"
                    logging.warning(msg)
                    continue
                
            try:
                xferData = reader.dbs.listDatasetContents(datasetPath,  block)
            except DbsException, ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    datasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
            
            xferData = _remapBlockParentage(datasetPath, xferData)
            
            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException, ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    datasetPath,)
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)
Exemple #8
0
    def makeBlockList(self, onlyClosedBlocks = False, sites=None,
        providedOnlyBlocks=None):
        """
        _makeBlockList_


        Generate the list of blocks for the workflow.

        1. Get the list of all blocks from the DBS
        2. Compare to list of blocks in persistency file
        3. Obtain the intersection of the new blocks and the providedOnlyBlocks list.
        4. Set OnlyBlocks parameter to intersection obtained.
        
        """
        #reader = DBSReader(self.dbsUrl)
        # At this point, blocks should be in local DBS
        localDBS = getLocalDBSURL()
        reader = DBSReader(localDBS)
        dbsBlocks = reader.listFileBlocks(self.inputDataset(), onlyClosedBlocks)
        
        if self.persistData.blocks != []:
            remover = lambda x : x not in self.persistData.blocks
            newBlocks = filter(remover, dbsBlocks)
        else:
            newBlocks = dbsBlocks

        #  //
        # // Skipping blocks without site info
        #//
        msg = "Filtering blocks according to Site information..."
        logging.info(msg)
        blocksAtSites = []
        for block in newBlocks:
            locations = reader.listFileBlockLocation(block)
            if not locations:
                msg = "\nSkipping block: "
                msg += "No site info available for block %s " % block
                logging.info(msg)
            elif sites is not None:
                locationInSites = False
                for location in locations:
                    if location in sites:
                        locationInSites = True
                        break
                if locationInSites:
                    blocksAtSites.append(block)
                else:
                    msg = "\nSkipping block: "
                    msg += "Block %s has no replicas in %s" % (block,
                        ", ".join(sites))
                    logging.info(msg)
            else:
                blocksAtSites.append(block)
        newBlocks = blocksAtSites

        if len(newBlocks) == 0:
            msg = "No New Blocks found for dataset\n"
            raise RuntimeError, msg

        #  //
        # // Check presence of provided Blocks in newBlocks
        #//
        blocksToProcess = []
        if providedOnlyBlocks is not None :
            providedOnlyBlocksList = providedOnlyBlocks.split(',')
            msg = "OnlyBlocks setting provided. Processing it..."
            logging.info(msg)
            msg = "OnlyBlocks list contains %s Blocks." % (
                len(providedOnlyBlocksList))
            logging.info(msg)
            blockCount = 1
            for block in providedOnlyBlocksList :
                if block.strip() in newBlocks :
                    blocksToProcess.append(block.strip())
                    msg = "Block %s: Adding Block %s" % (
                        blockCount, block)
                    msg += " to the Whitelist"
                    logging.info(msg)
                else:
                    msg = "Block %s: Skipping Block %s " % (
                        blockCount, block)
                    msg += "It's no New or it has been processed"
                    msg += " already."
                    logging.info(msg)
                blockCount += 1
        else :
            blocksToProcess = newBlocks
            msg = "OnlyBlocks setting not provided. Processing"
            msg += " all New Blocks for Dataset\n"
            logging.info(msg)

        if len(blocksToProcess) == 0 :
            msg = "OnlyBlocks list does not match any New Blocks"
            msg += " found for Dataset\n"
            raise RuntimeError, msg
        
        blockList = str(blocksToProcess)
        blockList = blockList.replace("[", "")
        blockList = blockList.replace("]", "")
        blockList = blockList.replace("\'", "")
        blockList = blockList.replace("\"", "")
        self.workflow.parameters['OnlyBlocks'] = blockList
        self.persistData.blocks.extend(blocksToProcess)
        return
Exemple #9
0
def createJobSplitter(dataset,
                      dbsUrl,
                      onlyClosedBlocks=False,
                      siteWhitelist=[],
                      blockWhitelist=[],
                      withParents=False):
    """
    _createJobSplitter_

    Instantiate a JobSplitter instance for the dataset provided
    and populate it with details from DBS.


    """
    reader = DBSReader(dbsUrl)
    result = JobSplitter(dataset)
    filterSites = len(siteWhitelist) > 0
    filterBlocks = len(blockWhitelist) > 0

    for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks):
        locations = reader.listFileBlockLocation(blockName)
        if filterBlocks:
            if blockName not in blockWhitelist:
                msg = "Excluding block %s based on block whitelist: %s\n" % (
                    blockName, blockWhitelist)
                logging.debug(msg)
                continue

        if filterSites:
            siteMatches = filter(lambda x: x in locations, siteWhitelist)

            if len(siteMatches) == 0:
                msg = "Excluding block %s based on sites: %s \n" % (
                    blockName,
                    locations,
                )
                logging.debug(msg)
                continue
            else:
                locations = siteMatches

        newBlock = result.newFileblock(blockName, *locations)

        if withParents == True:
            blockData = reader.getFileBlockWithParents(blockName)[blockName]
        else:
            blockData = reader.getFileBlock(blockName)[blockName]

        totalEvents = 0
        fileList = set()
        for fileInfo in blockData['Files']:
            totalEvents += fileInfo['NumberOfEvents']
            fileList.add(fileInfo['LogicalFileName'])
            if withParents:
                parList = [
                    x['LogicalFileName'] for x in fileInfo['ParentList']
                ]

                newBlock.addFile(fileInfo['LogicalFileName'],
                                 fileInfo['NumberOfEvents'], parList)
            else:
                newBlock.addFile(fileInfo['LogicalFileName'],
                                 fileInfo['NumberOfEvents'])

        logging.debug("Block %s contains %s events in %s files" % (
            blockName,
            totalEvents,
            len(fileList),
        ))

    return result
Exemple #10
0
    def makeFileList(self, onlyClosedBlocks = False, sites=None,
        providedOnlyBlocks=None, providedOnlyFiles=None):
        """
        _makeFileList_


        Generate the list of blocks for the workflow.

        1. Get the list of all blocks from the DBS
        2. Compare to list of blocks in persistency file
        3. Obtain the intersection of the new blocks and the providedOnlyBlocks
           list.
        4. Set OnlyBlocks parameter to intersection obtained.
        
        """
        #reader = DBSReader(self.dbsUrl)
        # At this point, blocks should be in local DBS
        localDBS = getLocalDBSURL()
        reader = DBSReader(localDBS)

        #  //
        # // Querying list of blocks from DBS
        #//
        msg = "Querying for closed blocks in Local DBS: %s ..." % localDBS
        logging.info(msg)
        dbsBlocks = reader.listFileBlocks(self.inputDataset(),
                                            onlyClosedBlocks)
        msg = "Retrieved %s close blocks from Local DBS" % len(dbsBlocks)
        logging.info(msg)

        #  //
        # // Constructing mapping structures block-file
        #//
        filesToBlocks = {}
        blocksToFiles = {}
        dbsFiles = reader.dbs.listFiles(path=self.inputDataset())
        for dbsfile in dbsFiles:
            if dbsfile['Block']['Name'] in dbsBlocks:
                filesToBlocks[dbsfile['LogicalFileName']] = \
                                                    dbsfile['Block']['Name']
                blocksToFiles.setdefault(dbsfile['Block']['Name'], []
                                         ).append(dbsfile['LogicalFileName'])

        # OnlyFiles?
        if providedOnlyFiles is not None and \
            providedOnlyFiles.strip().lower() != 'auto':
            msg = "Using OnlyFiles list:"
            msg += " %s files." % len(providedOnlyFiles.split(','))
            logging.info(msg)
            onlyFiles = [x.strip() for x in providedOnlyFiles.split(',') if x]
        # OnlyFiles=auto
        elif providedOnlyFiles is not None:
            msg = "Automatically generating OnlyFiles list from DBS..."
            logging.info(msg)
            onlyFiles = self.createOnlyFilesFromWorkflow()
        # OnlyBlocks
        elif providedOnlyBlocks is not None:
            msg = "Using OnlyBLocks list:"
            msg += " %s blocks." % len(providedOnlyBlocks.split(','))
            logging.info(msg)
            onlyFiles = []
            for block in \
                    [x.strip() for x in providedOnlyBlocks.split(',') if x]:
                onlyFiles.extend(blocksToFiles[dbsBlocks])
        # Processing everything in DBS
        else:
            msg = "Processing whole input dataset..."
            logging.info(msg)
            onlyFiles = []
            for block in dbsBlocks:
                onlyFiles.extend(blocksToFiles[dbsBlocks])

        if not onlyFiles:
            msg = "No files were found for the input dataset: " + \
                self.inputDataset()
            raise RuntimeError, msg

        #  //
        # // Filter files that were already processed
        #//
        if self.persistData.blocks:
            msg = "Filtering files that were already processed for this"
            msg += " workflow..."
            logging.info(msg)
            processedFiles = self.persistData.getFiles()
            msg = "Persistency file has %s file(s)" % len(processedFiles)
            logging.info(msg)
            remover  = lambda x: x not in processedFiles
            onlyFiles = filter(remover, onlyFiles)
            msg = "%s file(s) were removed" % \
                                    str(len(processedFiles) - len(onlyFiles))
            logging.info(msg)

        if not onlyFiles:
            msg = "No New files were found for the input dataset: " + \
                self.inputDataset()
            raise RuntimeError, msg

        #  //
        # // Filter files in blocks without site info
        #//
        msg = "Filtering blocks according to Site information..."
        logging.info(msg)
        candidateBlocks = {}
        for file in onlyFiles:
            candidateBlocks.setdefault(filesToBlocks[file], []).append(file)
        blocksAtSites = []
        for block in candidateBlocks:
            locations = reader.listFileBlockLocation(block)
            if not locations:
                msg = "Excluding block without site info ==> %s" % block
                logging.info(msg)
            elif sites is not None:
                locationInSites = False
                for location in locations:
                    if location in sites:
                        locationInSites = True
                        break
                if locationInSites:
                    blocksAtSites.append(block)
                else:
                    msg = "Excluding block without replicas"
                    msg += " in %s ==> %s" % (block, ", ".join(sites))
                    logging.info(msg)
            else:   
                blocksAtSites.append(block)
        if len(blocksAtSites) == 0:
            msg = "No block has site information."
            raise RuntimeError, msg

        #  //
        # // Constructing OnlyBlocks and OnlyFiles list
        #//
        onlyBlocks = {}
        for block in blocksAtSites:
            onlyBlocks[block] = candidateBlocks[block]
        onlyFiles = []
        for block in onlyBlocks:
            onlyFiles.extend(onlyBlocks[block])

        msg = "\n ==> Files to process: %s" % len(onlyFiles)
        msg += "\n ==> Blocks to process: %s" % len(onlyBlocks)
        logging.info(msg)
    
        blockList = ",".join(onlyBlocks.keys())
        fileList = ",".join(onlyFiles)
        self.workflow.parameters['OnlyBlocks'] = blockList
        self.workflow.parameters['OnlyFiles'] = fileList
        self.persistData.update(onlyBlocks)
        return
Exemple #11
0
    def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks):
        """
        _migrateDatasetBlocks_

        Migrate the list of fileblocks provided by blocks, belonging
        to the dataset specified by the dataset path to this DBS instance
        from the inputDBSUrl provided

        - *inputDBSUrl* : URL for connection to input DBS
        - *datasetPath* : Name of dataset in input DBS (must exist in input
                          DBS)
        - *blocks*      : list of block names to be migrated (must exist
                          in input DBS)

        """
        if len(blocks) == 0:
            msg = "FileBlocks not provided.\n"
            msg += "You must provide the name of at least one fileblock\n"
            msg += "to be migrated"
            raise DBSWriterError(msg)
        #  //
        # // Hook onto input DBSUrl and verify that the dataset & blocks
        #//  exist
        reader = DBSReader(inputDBSUrl)

        inputBlocks = reader.listFileBlocks(datasetPath)

        for block in blocks:
            #  //
            # // Test block exists at source
            #//
            if block not in inputBlocks:
                msg = "Block name:\n ==> %s\n" % block
                msg += "Not found in input dataset:\n ==> %s\n" % datasetPath
                msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl
                raise DBSWriterError(msg)

            #  //
            # // Test block does not exist in target
            #//
            if self.reader.blockExists(block):
                #  //
                # // block exists
                #//  If block is closed dont attempt transfer
                if not self.reader.blockIsOpen(block):
                    msg = "Block already exists in target DBS and is closed:\n"
                    msg += " ==> %s\n" % block
                    msg += "Skipping Migration of that block"
                    logging.warning(msg)
                    continue

            try:
                xferData = reader.dbs.listDatasetContents(datasetPath, block)
            except DbsException, ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not read content of dataset:\n ==> %s\n" % (
                    datasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)

            xferData = _remapBlockParentage(datasetPath, xferData)

            try:
                self.dbs.insertDatasetContents(xferData)
            except DbsException, ex:
                msg = "Error in DBSWriter.migrateDatasetBlocks\n"
                msg += "Could not write content of dataset:\n ==> %s\n" % (
                    datasetPath, )
                msg += "Block name:\n ==> %s\n" % block
                msg += "%s\n" % formatEx(ex)
                raise DBSWriterError(msg)