def createFileBlock(apiRef, datasetPath, seName): """ _createFileBlock_ Create a new file block in the processed dataset """ # First check for open blocks allBlocks = listBlocks(apiRef=apiRef, datasetPath=datasetPath, seName=seName) openBlocks = [b for b in allBlocks if str(b['OpenForWriting']) == "1"] # If you had open blocks, use last open blockRef = None if len(openBlocks) > 1: msg = "Too many open blocks for dataset:\n" msg += "SE: %s\n" % seName msg += "Dataset: %s\n" % datasetPath msg += "Using last open block\n" logging.error(msg) blockRef = openBlocks[-1] elif len(openBlocks) == 1: logging.warning("Attempted to open block while block already open") blockRef = openBlocks[0] else: try: newBlockName = apiRef.insertBlock(datasetPath, None, storage_element_list=[seName]) blocks = listBlocks(apiRef=apiRef, datasetPath=datasetPath, blockName=newBlockName) if len(blocks) > 1: # We have created a duplicate of a primary key according to Anzar msg = "Created duplicate blocks with duplicate names. Help!" msg += newBlockName raise DBSInterfaceError(msg) blockRef = blocks[0] except DbsException as ex: msg = "Error in DBSInterface.createFileBlock(%s)\n" % datasetPath msg += formatEx(ex) logging.error(msg) raise DBSInterfaceError(msg) # Add a files field, because we need it blockRef['newFiles'] = [] blockRef['insertedFiles'] = [] return blockRef
def closeAndMigrateBlocksByName(self, blockNames): """ _closeAndMigrateBlocksByName_ This is basically for the timeout in DBSUploadPoller It allows you to close and migrate a block by name only It expects a list of names really, although it can take just one. """ if type(blockNames) != list: blockNames = [blockNames] blocksToClose = [] for name in blockNames: blockList = listBlocks(apiRef=self.dbs, blockName=name) if len(blockList) != 1: msg = "Error: We can't load blocks with this name\n" msg += str(name) msg += "\nRetrieved %i blocks" % (len(blockList)) logging.error(msg) raise DBSInterfaceError(msg) block = blockList[0] block['open'] = 'Pending' b2 = self.insertFilesAndCloseBlocks(block=block, close=True) blocksToClose.append(b2) if self.doGlobalMigration: self.migrateClosedBlocks(blocks=blocksToClose) return blocksToClose
def insertFiles(apiRef, datasetPath, files, block, maxFiles=10): """ _insertFiles_ Insert files into a certain block files = list of file objects """ if len(files) == 0: return # First break into small chunks listOfFileLists = [] while len(files) > maxFiles: listOfFileLists.append(files[:maxFiles]) files = files[maxFiles:] listOfFileLists.append(files) for fileList in listOfFileLists: try: apiRef.insertFiles(datasetPath, fileList, block) except DbsException, ex: msg = "Error in DBSInterface.insertFiles(%s)\n" % datasetPath msg += "%s\n" % formatEx(ex) msg += str(traceback.format_exc()) raise DBSInterfaceError(msg)
def listPrimaryDatasets(apiRef, match="*"): """ List some Primary Datasets """ try: result = apiRef.listPrimaryDatasets(match) except DbsException, ex: msg = "Error in DBSInterface.listPrimaryDataset(%s)\n" % match msg += "%s\n" % formatEx(ex) raise DBSInterfaceError(msg)
def _createProcessedDataset(self, algorithm, apiRef, primary, processedName, dataTier, group = "NoGroup", status = "VALID", globalTag = '', parent = None): """ _createProcessedDataset_ Create an procds object in fake DBS2 """ if algorithm not in self.algoList: raise DBSInterfaceError("No algo object inserted before associated processed dataset") if primary not in self.primaryDatasetList: raise DBSInterfaceError("No primds object inserted before associated processed dataset") procDsObject = {'name' : processedName, 'tier' : dataTier, 'group' : group, 'status' : status, 'tag' : globalTag, 'parent' : parent} if apiRef: self.processedDatasetList.append(procDsObject) return procDsObject
def listBlocks(apiRef, datasetPath=None, blockName="*", seName="*"): """ _listBlocks_ List all the blocks in a primary dataset """ try: blocks = apiRef.listBlocks(datasetPath, block_name=blockName, storage_element_name=seName) except DbsException, ex: msg = "Error in DBSInterface.listBlocks(%s)\n" % datasetPath msg += "%s\n" % formatEx(ex) raise DBSInterfaceError(msg)
def listProcessedDatasets(apiRef, primary, dataTier="*"): """ _listProcessedDatasets_ return a list of Processed datasets for the primary and optional data tier value """ try: result = apiRef.listProcessedDatasets(primary, dataTier) except DbsException, ex: msg = "Error in DBSInterface.listProcessedDatasets(%s)\n" % primary msg += "%s\n" % formatEx(ex) raise DBSInterfaceError(msg)
def closeBlock(apiRef, block): """ _closeBlock_ Close a block """ logging.info("In closeBlock()") try: apiRef.closeBlock(block) logging.info("Back from closeBlock()") except DbsException, ex: msg = "Error in DBSInterface.closeBlock(%s)\n" % block msg += "%s\n" % formatEx(ex) raise DBSInterfaceError(msg)
def listAlgorithms(apiRef, patternVer="*", patternFam="*", patternExe="*", patternPS="*"): """ List some Primary Datasets """ try: result = apiRef.listAlgorithms(patternVer = patternVer, patternFam = patternFam, patternExe = patternExe, patternPS = patternPS) except DbsException as ex: msg = "Error in DBSInterface.listPrimaryDataset()\n" msg += "%s\n" % formatEx(ex) raise DBSInterfaceError(msg) return result
def createProcessedDataset(algorithm, apiRef, primary, processedName, dataTier, group="NoGroup", status="VALID", globalTag='', parent=None): """ _createProcessedDataset_ Create a processed dataset """ if parent != None: parents = [parent] else: parents = [] tierList = dataTier.split("-") if not type(algorithm) == list: algorithm = [algorithm] processedDataset = DbsProcessedDataset(PrimaryDataset=primary, AlgoList=algorithm, Name=processedName, TierList=tierList, ParentList=parents, PhysicsGroup=group, Status=status, GlobalTag=globalTag) if apiRef != None: try: apiRef.insertProcessedDataset(processedDataset) except DbsException as ex: msg = "Error in DBSInterface.createProcessedDataset(%s)\n" % processedName msg += formatEx(ex) logging.error(msg) raise DBSInterfaceError(msg) logging.info("PrimaryDataset: %s ProcessedDataset: %s DataTierList: %s requested by PhysicsGroup: %s " \ % (primary['Name'], processedName, dataTier, group)) return processedDataset
def __init__(self, config): """ Use Configuration object from config """ # Config should have DBSInterface element self.config = config.DBSInterface args = { 'url': self.config.DBSUrl, 'level': 'ERROR', "user": '******', 'version': self.config.DBSVersion } self.version = self.config.DBSVersion self.globalDBSUrl = None self.committedRuns = collections.deque(maxlen=1000) self.maxBlockFiles = self.config.DBSBlockMaxFiles self.maxBlockTime = self.config.DBSBlockMaxTime self.maxBlockSize = self.config.DBSBlockMaxSize self.maxFilesToCommit = self.config.MaxFilesToCommit self.doGlobalMigration = getattr(self.config, 'doGlobalMigration', True) if getattr(self.config, 'globalDBSUrl', None) != None: globalArgs = { 'url': self.config.globalDBSUrl, 'level': 'ERROR', "user": '******', 'version': self.config.globalDBSVersion } self.globalDBSUrl = self.config.globalDBSUrl else: self.doGlobalMigration = False try: self.dbs = DbsApi(args) if self.globalDBSUrl: self.globalDBS = DbsApi(globalArgs) except DbsException, ex: msg = "Error in DBSWriterError with DbsApi\n" msg += "%s\n" % formatEx(ex) logging.error(msg) raise DBSInterfaceError(msg)
def createUncheckedBlock(apiRef, name, datasetPath, seName): """ _createUncheckedBlock_ Blindly create block with the established name Doesn't do any checks for open or existant blocks """ try: newBlockName = apiRef.insertBlock(dataset=datasetPath, block=name, storage_element_list=[seName]) except DbsException, ex: msg = "Error in DBSInterface.createUncheckedBlock(%s)\n" % name msg += formatEx(ex) logging.error(msg) raise DBSInterfaceError(msg)
def createPrimaryDataset(primaryName, primaryDatasetType='mc', apiRef=None): """ _createPrimaryDataset_ """ logging.debug("Inserting PrimaryDataset %s with Type %s" \ % (primaryName, primaryDatasetType)) primary = DbsPrimaryDataset(Name=primaryName, Type=primaryDatasetType) if apiRef: try: apiRef.insertPrimaryDataset(primary) except DbsException, ex: msg = "Error in DBSInterface.createPrimaryDataset(%s)\n" % primaryName msg += formatEx(ex) logging.error(msg) raise DBSInterfaceError(msg)
def createAlgorithm(apiRef, appName, appVer, appFam, PSetHash=None, PSetContent=None): """ _createAlgorithm_ Create a new DBS Algorithm, explicitly passing in the arguments. We don't use configs. The insert tag tell you whether or not to actually write this to DBS """ # Take care of PSetHash if not PSetHash: PSetHash = "NO_PSET_HASH2" elif PSetHash.find(";"): # no need for fake hash in new schema PSetHash = PSetHash.split(";")[0] PSetHash = PSetHash.replace("hash=", "") # Create PSetHash. dbsApi tries to base64encode the value of PSetContent # which blows up if it's None if not PSetContent: PSetContent = "" psetInstance = DbsQueryableParameterSet(Hash=PSetHash, Content=PSetContent) algorithmInstance = DbsAlgorithm(ExecutableName=appName, ApplicationVersion=appVer, ApplicationFamily=appFam, ParameterSetID=psetInstance) if apiRef: try: apiRef.insertAlgorithm(algorithmInstance) except DbsException as ex: msg = "Error in DBSInterface.createAlgorithm(%s)\n" % appVer msg += formatEx(ex) logging.error(msg) raise DBSInterfaceError(msg) return algorithmInstance
def migrateClosedBlocks(self, blocks): """ _migrateClosedBlocks_ One at a time, migrate closed blocks This checks to see if blocks are closed. If they are, it migrates them. """ if not self.doGlobalMigration: logging.debug("Skipping migration due to doGlobalMigration tag.") return blocks if type(blocks) != list: blocks = [blocks] for block in blocks: if block.get('OpenForWriting', 1) != '0': # logging.error("Attempt to migrate open block!") # Block is not done # Ignore this, because we send all blocks here continue try: # Migrate each block logging.info("About to migrate block %s" % (block['Name'])) self.dbs.dbsMigrateBlock(srcURL = self.config.DBSUrl, dstURL = self.globalDBSUrl, block_name = block['Name'], srcVersion = self.version, dstVersion = self.config.globalDBSVersion) block['open'] = 'InGlobalDBS' except DbsException as ex: msg = "Error in DBSInterface.migrateClosedBlocks()\n" msg += "%s\n" % formatEx(ex) msg += str(traceback.format_exc()) raise DBSInterfaceError(msg) return blocks