def manageDatasetBlocks(datasetPath, localDBS, globalDBS, phedexConfig=None, phedexNodes=None): """ _manageDatasetBlocks_ Trawl through the dataset for all remaining open blocks, and then close them, migrate them to global and inject them into PhEDEx if phedexConfig is not None, using the optional list of PhEDEx nodes if provided. """ dbs = DBSReader(localDBS) blocks = dbs.listFileBlocks(datasetPath) for block in blocks: if dbs.blockIsOpen(block): blockMgr = BlockManager(block, localDbs, globalDbs, datasetPath) blockMgr.closeBlock() blockMgr.migrateToGlobalDBS() if phedexConfig != None: blockMgr.injectBlockToPhEDEx(phedexConfig, phedexNodes) return
def __call__(self): """ _operator()_ Load PU dataset information from DBS """ reader = DBSReader(self.dbsUrl) blocks = reader.listFileBlocks(self.dataset, False) for block in blocks: # // # // Populate locations #// locations = reader.listFileBlockLocation(block) if locations: self.blockSites[block] = locations for location in locations: if not self.sites.has_key(location): self.sites[location] = set() self.sites[location].add(block) # // # // Populate File list for block #// self[block] = reader.lfnsInBlock(block) return
def createJobSplitter(dataset, dbsUrl, onlyClosedBlocks=False, siteWhitelist=[], blockWhitelist=[], withParents=False): """ _createJobSplitter_ Instantiate a JobSplitter instance for the dataset provided and populate it with details from DBS. """ reader = DBSReader(dbsUrl) result = JobSplitter(dataset) filterSites = len(siteWhitelist) > 0 filterBlocks = len(blockWhitelist) > 0 for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks): locations = reader.listFileBlockLocation(blockName) if filterBlocks: if blockName not in blockWhitelist: msg = "Excluding block %s based on block whitelist: %s\n" % (blockName, blockWhitelist) logging.debug(msg) continue if filterSites: siteMatches = filter(lambda x: x in locations, siteWhitelist) if len(siteMatches) == 0: msg = "Excluding block %s based on sites: %s \n" % (blockName, locations) logging.debug(msg) continue else: locations = siteMatches newBlock = result.newFileblock(blockName, *locations) if withParents == True: blockData = reader.getFileBlockWithParents(blockName)[blockName] else: blockData = reader.getFileBlock(blockName)[blockName] totalEvents = 0 fileList = set() for fileInfo in blockData["Files"]: totalEvents += fileInfo["NumberOfEvents"] fileList.add(fileInfo["LogicalFileName"]) if withParents: parList = [x["LogicalFileName"] for x in fileInfo["ParentList"]] newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"], parList) else: newBlock.addFile(fileInfo["LogicalFileName"], fileInfo["NumberOfEvents"]) logging.debug("Block %s contains %s events in %s files" % (blockName, totalEvents, len(fileList))) return result
def loadSites(self, **dbsContacts): """ Get the list of sites hosting the PU from DBS/DLS """ dbsUrl = dbsContacts.get('DBSURL', None) if dbsUrl == None: dbsUrl = getLocalDBSURL() reader = DBSReader(dbsUrl) locations = [] blocks = reader.listFileBlocks(self.dataset, True) for block in blocks: try: locations = reader.listFileBlockLocation(block) except Exception, ex: msg = "Unable to find DLS Locations for Block: %s\n" % block msg += str(ex) logging.warning(msg) continue
def manageDatasetBlocks(datasetPath, localDBS, globalDBS, phedexConfig = None, phedexNodes = None): """ _manageDatasetBlocks_ Trawl through the dataset for all remaining open blocks, and then close them, migrate them to global and inject them into PhEDEx if phedexConfig is not None, using the optional list of PhEDEx nodes if provided. """ dbs = DBSReader(localDBS) blocks = dbs.listFileBlocks(datasetPath) for block in blocks: if dbs.blockIsOpen(block): blockMgr = BlockManager(block, localDbs, globalDbs, datasetPath) blockMgr.closeBlock() blockMgr.migrateToGlobalDBS() if phedexConfig != None: blockMgr.injectBlockToPhEDEx(phedexConfig, phedexNodes) return
def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks): """ _migrateDatasetBlocks_ Migrate the list of fileblocks provided by blocks, belonging to the dataset specified by the dataset path to this DBS instance from the inputDBSUrl provided - *inputDBSUrl* : URL for connection to input DBS - *datasetPath* : Name of dataset in input DBS (must exist in input DBS) - *blocks* : list of block names to be migrated (must exist in input DBS) """ if len(blocks) == 0: msg = "FileBlocks not provided.\n" msg += "You must provide the name of at least one fileblock\n" msg += "to be migrated" raise DBSWriterError(msg) # // # // Hook onto input DBSUrl and verify that the dataset & blocks #// exist reader = DBSReader(inputDBSUrl) inputBlocks = reader.listFileBlocks(datasetPath) for block in blocks: # // # // Test block exists at source #// if block not in inputBlocks: msg = "Block name:\n ==> %s\n" % block msg += "Not found in input dataset:\n ==> %s\n" % datasetPath msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl raise DBSWriterError(msg) # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if not self.reader.blockIsOpen(block): msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Migration of that block" logging.warning(msg) continue try: xferData = reader.dbs.listDatasetContents(datasetPath, block) except DbsException, ex: msg = "Error in DBSWriter.migrateDatasetBlocks\n" msg += "Could not read content of dataset:\n ==> %s\n" % ( datasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) xferData = _remapBlockParentage(datasetPath, xferData) try: self.dbs.insertDatasetContents(xferData) except DbsException, ex: msg = "Error in DBSWriter.migrateDatasetBlocks\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( datasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg)
def makeBlockList(self, onlyClosedBlocks = False, sites=None, providedOnlyBlocks=None): """ _makeBlockList_ Generate the list of blocks for the workflow. 1. Get the list of all blocks from the DBS 2. Compare to list of blocks in persistency file 3. Obtain the intersection of the new blocks and the providedOnlyBlocks list. 4. Set OnlyBlocks parameter to intersection obtained. """ #reader = DBSReader(self.dbsUrl) # At this point, blocks should be in local DBS localDBS = getLocalDBSURL() reader = DBSReader(localDBS) dbsBlocks = reader.listFileBlocks(self.inputDataset(), onlyClosedBlocks) if self.persistData.blocks != []: remover = lambda x : x not in self.persistData.blocks newBlocks = filter(remover, dbsBlocks) else: newBlocks = dbsBlocks # // # // Skipping blocks without site info #// msg = "Filtering blocks according to Site information..." logging.info(msg) blocksAtSites = [] for block in newBlocks: locations = reader.listFileBlockLocation(block) if not locations: msg = "\nSkipping block: " msg += "No site info available for block %s " % block logging.info(msg) elif sites is not None: locationInSites = False for location in locations: if location in sites: locationInSites = True break if locationInSites: blocksAtSites.append(block) else: msg = "\nSkipping block: " msg += "Block %s has no replicas in %s" % (block, ", ".join(sites)) logging.info(msg) else: blocksAtSites.append(block) newBlocks = blocksAtSites if len(newBlocks) == 0: msg = "No New Blocks found for dataset\n" raise RuntimeError, msg # // # // Check presence of provided Blocks in newBlocks #// blocksToProcess = [] if providedOnlyBlocks is not None : providedOnlyBlocksList = providedOnlyBlocks.split(',') msg = "OnlyBlocks setting provided. Processing it..." logging.info(msg) msg = "OnlyBlocks list contains %s Blocks." % ( len(providedOnlyBlocksList)) logging.info(msg) blockCount = 1 for block in providedOnlyBlocksList : if block.strip() in newBlocks : blocksToProcess.append(block.strip()) msg = "Block %s: Adding Block %s" % ( blockCount, block) msg += " to the Whitelist" logging.info(msg) else: msg = "Block %s: Skipping Block %s " % ( blockCount, block) msg += "It's no New or it has been processed" msg += " already." logging.info(msg) blockCount += 1 else : blocksToProcess = newBlocks msg = "OnlyBlocks setting not provided. Processing" msg += " all New Blocks for Dataset\n" logging.info(msg) if len(blocksToProcess) == 0 : msg = "OnlyBlocks list does not match any New Blocks" msg += " found for Dataset\n" raise RuntimeError, msg blockList = str(blocksToProcess) blockList = blockList.replace("[", "") blockList = blockList.replace("]", "") blockList = blockList.replace("\'", "") blockList = blockList.replace("\"", "") self.workflow.parameters['OnlyBlocks'] = blockList self.persistData.blocks.extend(blocksToProcess) return
def createJobSplitter(dataset, dbsUrl, onlyClosedBlocks=False, siteWhitelist=[], blockWhitelist=[], withParents=False): """ _createJobSplitter_ Instantiate a JobSplitter instance for the dataset provided and populate it with details from DBS. """ reader = DBSReader(dbsUrl) result = JobSplitter(dataset) filterSites = len(siteWhitelist) > 0 filterBlocks = len(blockWhitelist) > 0 for blockName in reader.listFileBlocks(dataset, onlyClosedBlocks): locations = reader.listFileBlockLocation(blockName) if filterBlocks: if blockName not in blockWhitelist: msg = "Excluding block %s based on block whitelist: %s\n" % ( blockName, blockWhitelist) logging.debug(msg) continue if filterSites: siteMatches = filter(lambda x: x in locations, siteWhitelist) if len(siteMatches) == 0: msg = "Excluding block %s based on sites: %s \n" % ( blockName, locations, ) logging.debug(msg) continue else: locations = siteMatches newBlock = result.newFileblock(blockName, *locations) if withParents == True: blockData = reader.getFileBlockWithParents(blockName)[blockName] else: blockData = reader.getFileBlock(blockName)[blockName] totalEvents = 0 fileList = set() for fileInfo in blockData['Files']: totalEvents += fileInfo['NumberOfEvents'] fileList.add(fileInfo['LogicalFileName']) if withParents: parList = [ x['LogicalFileName'] for x in fileInfo['ParentList'] ] newBlock.addFile(fileInfo['LogicalFileName'], fileInfo['NumberOfEvents'], parList) else: newBlock.addFile(fileInfo['LogicalFileName'], fileInfo['NumberOfEvents']) logging.debug("Block %s contains %s events in %s files" % ( blockName, totalEvents, len(fileList), )) return result
def makeFileList(self, onlyClosedBlocks = False, sites=None, providedOnlyBlocks=None, providedOnlyFiles=None): """ _makeFileList_ Generate the list of blocks for the workflow. 1. Get the list of all blocks from the DBS 2. Compare to list of blocks in persistency file 3. Obtain the intersection of the new blocks and the providedOnlyBlocks list. 4. Set OnlyBlocks parameter to intersection obtained. """ #reader = DBSReader(self.dbsUrl) # At this point, blocks should be in local DBS localDBS = getLocalDBSURL() reader = DBSReader(localDBS) # // # // Querying list of blocks from DBS #// msg = "Querying for closed blocks in Local DBS: %s ..." % localDBS logging.info(msg) dbsBlocks = reader.listFileBlocks(self.inputDataset(), onlyClosedBlocks) msg = "Retrieved %s close blocks from Local DBS" % len(dbsBlocks) logging.info(msg) # // # // Constructing mapping structures block-file #// filesToBlocks = {} blocksToFiles = {} dbsFiles = reader.dbs.listFiles(path=self.inputDataset()) for dbsfile in dbsFiles: if dbsfile['Block']['Name'] in dbsBlocks: filesToBlocks[dbsfile['LogicalFileName']] = \ dbsfile['Block']['Name'] blocksToFiles.setdefault(dbsfile['Block']['Name'], [] ).append(dbsfile['LogicalFileName']) # OnlyFiles? if providedOnlyFiles is not None and \ providedOnlyFiles.strip().lower() != 'auto': msg = "Using OnlyFiles list:" msg += " %s files." % len(providedOnlyFiles.split(',')) logging.info(msg) onlyFiles = [x.strip() for x in providedOnlyFiles.split(',') if x] # OnlyFiles=auto elif providedOnlyFiles is not None: msg = "Automatically generating OnlyFiles list from DBS..." logging.info(msg) onlyFiles = self.createOnlyFilesFromWorkflow() # OnlyBlocks elif providedOnlyBlocks is not None: msg = "Using OnlyBLocks list:" msg += " %s blocks." % len(providedOnlyBlocks.split(',')) logging.info(msg) onlyFiles = [] for block in \ [x.strip() for x in providedOnlyBlocks.split(',') if x]: onlyFiles.extend(blocksToFiles[dbsBlocks]) # Processing everything in DBS else: msg = "Processing whole input dataset..." logging.info(msg) onlyFiles = [] for block in dbsBlocks: onlyFiles.extend(blocksToFiles[dbsBlocks]) if not onlyFiles: msg = "No files were found for the input dataset: " + \ self.inputDataset() raise RuntimeError, msg # // # // Filter files that were already processed #// if self.persistData.blocks: msg = "Filtering files that were already processed for this" msg += " workflow..." logging.info(msg) processedFiles = self.persistData.getFiles() msg = "Persistency file has %s file(s)" % len(processedFiles) logging.info(msg) remover = lambda x: x not in processedFiles onlyFiles = filter(remover, onlyFiles) msg = "%s file(s) were removed" % \ str(len(processedFiles) - len(onlyFiles)) logging.info(msg) if not onlyFiles: msg = "No New files were found for the input dataset: " + \ self.inputDataset() raise RuntimeError, msg # // # // Filter files in blocks without site info #// msg = "Filtering blocks according to Site information..." logging.info(msg) candidateBlocks = {} for file in onlyFiles: candidateBlocks.setdefault(filesToBlocks[file], []).append(file) blocksAtSites = [] for block in candidateBlocks: locations = reader.listFileBlockLocation(block) if not locations: msg = "Excluding block without site info ==> %s" % block logging.info(msg) elif sites is not None: locationInSites = False for location in locations: if location in sites: locationInSites = True break if locationInSites: blocksAtSites.append(block) else: msg = "Excluding block without replicas" msg += " in %s ==> %s" % (block, ", ".join(sites)) logging.info(msg) else: blocksAtSites.append(block) if len(blocksAtSites) == 0: msg = "No block has site information." raise RuntimeError, msg # // # // Constructing OnlyBlocks and OnlyFiles list #// onlyBlocks = {} for block in blocksAtSites: onlyBlocks[block] = candidateBlocks[block] onlyFiles = [] for block in onlyBlocks: onlyFiles.extend(onlyBlocks[block]) msg = "\n ==> Files to process: %s" % len(onlyFiles) msg += "\n ==> Blocks to process: %s" % len(onlyBlocks) logging.info(msg) blockList = ",".join(onlyBlocks.keys()) fileList = ",".join(onlyFiles) self.workflow.parameters['OnlyBlocks'] = blockList self.workflow.parameters['OnlyFiles'] = fileList self.persistData.update(onlyBlocks) return
def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks): """ _migrateDatasetBlocks_ Migrate the list of fileblocks provided by blocks, belonging to the dataset specified by the dataset path to this DBS instance from the inputDBSUrl provided - *inputDBSUrl* : URL for connection to input DBS - *datasetPath* : Name of dataset in input DBS (must exist in input DBS) - *blocks* : list of block names to be migrated (must exist in input DBS) """ if len(blocks) == 0: msg = "FileBlocks not provided.\n" msg += "You must provide the name of at least one fileblock\n" msg += "to be migrated" raise DBSWriterError(msg) # // # // Hook onto input DBSUrl and verify that the dataset & blocks #// exist reader = DBSReader(inputDBSUrl) inputBlocks = reader.listFileBlocks(datasetPath) for block in blocks: # // # // Test block exists at source #// if block not in inputBlocks: msg = "Block name:\n ==> %s\n" % block msg += "Not found in input dataset:\n ==> %s\n" % datasetPath msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl raise DBSWriterError(msg) # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if not self.reader.blockIsOpen(block): msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Migration of that block" logging.warning(msg) continue try: xferData = reader.dbs.listDatasetContents(datasetPath, block) except DbsException, ex: msg = "Error in DBSWriter.migrateDatasetBlocks\n" msg += "Could not read content of dataset:\n ==> %s\n" % ( datasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) xferData = _remapBlockParentage(datasetPath, xferData) try: self.dbs.insertDatasetContents(xferData) except DbsException, ex: msg = "Error in DBSWriter.migrateDatasetBlocks\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( datasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg)