def _queryAndCompareWithDBS(self, pileupDict, defaultArguments, dbsUrl): """ pileupDict is a Python dictionary containing particular pileup configuration information. Query DBS on given dataset contained now in both input defaultArguments as well as in the pileupDict and compare values. """ args = {} # this should have been set in CMSSWStepHelper along with # the pileup configuration args["url"] = dbsUrl args["version"] = "DBS_2_0_9" args["mode"] = "GET" dbsApi = DbsApi(args) inputArgs = defaultArguments["PileupConfig"] self.assertEqual(len(inputArgs), len(pileupDict), "Number of pileup types different.") for pileupType in inputArgs: m = ("pileup type '%s' not in PileupFetcher-produced pileup " "configuration: '%s'" % (pileupType, pileupDict)) self.failUnless(pileupType in pileupDict, m) # now query DBS for compare actual results on files lists for each # pileup type and dataset and location (storage element names) # pileupDict is saved in the file and now comparing items of this # configuration with actual DBS results, the structure of pileupDict: # {"pileupTypeA": {"BlockA": {"FileList": [], "StorageElementNames": []}, # "BlockB": {"FileList": [], "StorageElementName": []}, ....} for pileupType, datasets in inputArgs.items(): # this is from the pileup configuration produced by PileupFetcher blockDict = pileupDict[pileupType] m = "Number of datasets for pileup type '%s' is not equal." % pileupType self.assertEqual(len(blockDict), len(datasets), m) for dataset in datasets: dbsFileBlocks = dbsApi.listBlocks(dataset = dataset) fileList = [] # list of files in the block (dbsFile["LogicalFileName"]) storageElemNames = [] # list of StorageElementName for dbsFileBlock in dbsFileBlocks: blockName = dbsFileBlock["Name"] # each DBS block has a list under 'StorageElementList', iterate over for storElem in dbsFileBlock["StorageElementList"]: storageElemNames.append(storElem["Name"]) # now get list of files in the block dbsFiles = dbsApi.listFiles(blockName = blockName) for dbsFile in dbsFiles: fileList.append(dbsFile["LogicalFileName"]) # now compare the sets: m = ("StorageElementNames don't agree for pileup type '%s', " "dataset '%s' in configuration: '%s'" % (pileupType, dataset, pileupDict)) self.assertEqual(blockDict[blockName]["StorageElementNames"], storageElemNames, m) m = ("FileList don't agree for pileup type '%s', dataset '%s' " " in configuration: '%s'" % (pileupType, dataset, pileupDict)) self.assertEqual(blockDict[blockName]["FileList"], fileList)
args = {} args['url'] = dstURL args['mode']='POST' api = DbsApi(args) try: print "\n\nListing Datasets " paths = api.listDatasetPaths() ranIndex = random.randint(1,len(paths)) #myDataset = paths[ranIndex] myDataset = "/Wjets-sherpa/Summer08_IDEAL_V12_v1/GEN-SIM-RAW" print "Selected dataset is %s", myDataset print "\nListing Blocks " blocks = api.listBlocks(myDataset) #print blocks print "\n Listing Files " files = api.listFiles(path = myDataset, retriveList=["all"]) #print files #for path in paths: # print path except DbsApiException, ex: print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage() ) if ex.getErrorCode() not in (None, ""): print "DBS Exception Error Code: ", ex.getErrorCode()
#!/usr/bin/env python # # API Unit tests for the DBS JavaServer. import sys from DBSAPI.dbsApi import DbsApi from DBSAPI.dbsException import * from DBSAPI.dbsApiException import * from DBSAPI.dbsOptions import DbsOptionParser try: optManager = DbsOptionParser() (opts,args) = optManager.getOpt() api = DbsApi(opts.__dict__) for block in api.listBlocks("/test_primary_anzar_001/SIM/TestProcessedDS001"): #for block in api.listBlocks("/TestPrimary1167862926.47/SIM1167862926.47/TestProcessed1167862926.47", "/*hahah#12345"): #for block in api.listBlocks("/TestPrimary1167862926.47/SIM1167862926.47/TestProcessed1167862926.47", "/this/*"): #for block in api.listBlocks("/TestPrimary1167862926.47/SIM1167862926.47/TestProcessed1167862926.47", "/this/ff*"): #for block in api.listBlocks("/TestPrimary1167862926.47/SIM1167862926.47/TestProcessed1167862926.47", "/this/hahah#12345"): #for block in api.listBlocks("/TestPrimary1167862926.47/SIM1167862926.47/TestProcessed1167862926.47", "/this/hahah#12345"): print " %s" % block except DbsApiException, ex: print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage() ) if ex.getErrorCode() not in (None, ""): print "DBS Exception Error Code: ", ex.getErrorCode()
if blockFile not in blockFiles: if not badFiles.has_key(blockName): badFiles[blockName] = [] badFiles[blockName].append(blockFile) #sys.exit(0) psetInstance = DbsQueryableParameterSet(Hash = "GIBBERISH") for newBlockName in badFiles.keys(): seName = blockLocation[newBlockName] (datasetPath, junk) = newBlockName.split("#", 1) dbsApi.insertBlock(datasetPath, newBlockName, storage_element_list = [seName]) blockRef = dbsApi.listBlocks(dataset = datasetPath, block_name = newBlockName)[0] print blockRef newFiles = [] for newFileLFN in badFiles[newBlockName]: localFile = DBSBufferFile(lfn = newFileLFN) localFile.load(parentage = 1) (primaryDS, procDS, tier) = datasetPath[1:].split("/", 3) primary = DbsPrimaryDataset(Name = primaryDS, Type = "mc") algo = DbsAlgorithm(ExecutableName = localFile["appName"], ApplicationVersion = localFile["appVer"], ApplicationFamily = localFile["appFam"], ParameterSetID = psetInstance) processed = DbsProcessedDataset(PrimaryDataset = primary, AlgoList = [algo],
apiSrc = DbsApi(opts.__dict__) else : apiSrc = DbsApi(argsSrc) if(argsTar['url'] == "LOCALDBS" ): apiTar = DbsApi(opts.__dict__) else: apiTar = DbsApi(argsTar) path = sys.argv[3] print path #name = argsSrc['url'].replace('/','_') + "_" + argsTar['url'].replace('/', '_') + path.replace('/', '_') name = argsSrc['url'].replace('/','_').replace(':', '_') + "_" + argsTar['url'].replace('/', '_').replace(':', '_') blocks = apiSrc.listBlocks(path) if ((op == "both") | (op == "get")) : #Fetch the dataset contents and save them in a file for i in blocks: blockName = i['Name'] fileName = blockName.replace('/', '_').replace('#', '_') + ".xml" print "Fetching information for Block %s " % blockName xmlinput = apiSrc.listDatasetContents(path, blockName) f = open(name + fileName, "w"); f.write(xmlinput) f.close() print "Dataset information fetched from " + argsSrc['url'] + " in XML format is saved in " + name + fileName if ((op == "both") | (op == "set")) : #Insert the saved contents into another DBS instance for i in blocks:
print " # files already published: ",nPublishedFiles print " # new files: ",nFiles print "#################################################################################################" #-------------------- # loop over files count = 0 while count < len(files): # ------------------ # insert block print "inserting block..." blockName = dbsApi.insertBlock(datasetPath, None , storage_element_list = [seName]) block = dbsApi.listBlocks(datasetPath, block_name = blockName, storage_element_name = seName)[0] print "...block inserted (name: ",blockName,")" #-------------------- # loop over files start = count stop = min(count+blockSize,len(files)) _files = files[start:stop] dbsFiles = [] print "preparing files for 1 block..." for file in _files:
badFiles[blockName] = [] badFiles[blockName].append(blockFile) #sys.exit(0) psetInstance = DbsQueryableParameterSet(Hash="GIBBERISH") for newBlockName in badFiles.keys(): seName = blockLocation[newBlockName] (datasetPath, junk) = newBlockName.split("#", 1) dbsApi.insertBlock(datasetPath, newBlockName, storage_element_list=[seName]) blockRef = dbsApi.listBlocks(dataset=datasetPath, block_name=newBlockName)[0] print blockRef newFiles = [] for newFileLFN in badFiles[newBlockName]: localFile = DBSBufferFile(lfn=newFileLFN) localFile.load(parentage=1) (primaryDS, procDS, tier) = datasetPath[1:].split("/", 3) primary = DbsPrimaryDataset(Name=primaryDS, Type="mc") algo = DbsAlgorithm(ExecutableName=localFile["appName"], ApplicationVersion=localFile["appVer"], ApplicationFamily=localFile["appFam"], ParameterSetID=psetInstance) processed = DbsProcessedDataset(PrimaryDataset=primary, AlgoList=[algo],
if myopt == "--dataset" : datasetpath = myarg if datasetpath == None: print "Please specify --dataset" sys.exit(2) se = 'srm.cern.ch' try: outputname = datasetpath.replace('/','_')[1:]+'.xml' outputfile = file(outputname.replace('PreCSA08','CSA08'),'w') output = "" api = DbsApi(dbsargs) try: blocks = api.listBlocks(dataset=datasetpath,block_name="*",storage_element_name=se); except: blocks = [] for block in blocks: output += api.listDatasetContents(datasetpath,block['Name']) newoutput = output.replace('PreCSA08','CSA08') outputfile.write(newoutput) outputfile.close() except DbsApiException, ex: print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage() ) if ex.getErrorCode() not in (None, ""): print "DBS Exception Error Code: ", ex.getErrorCode()
def _queryAndCompareWithDBS(self, pileupDict, defaultArguments, dbsUrl): """ pileupDict is a Python dictionary containing particular pileup configuration information. Query DBS on given dataset contained now in both input defaultArguments as well as in the pileupDict and compare values. """ args = {} # this should have been set in CMSSWStepHelper along with # the pileup configuration args["url"] = dbsUrl args["version"] = "DBS_2_0_9" args["mode"] = "GET" dbsApi = DbsApi(args) inputArgs = defaultArguments["PileupConfig"] self.assertEqual(len(inputArgs), len(pileupDict), "Number of pileup types different.") for pileupType in inputArgs: m = ("pileup type '%s' not in PileupFetcher-produced pileup " "configuration: '%s'" % (pileupType, pileupDict)) self.failUnless(pileupType in pileupDict, m) # now query DBS for compare actual results on files lists for each # pileup type and dataset and location (storage element names) # pileupDict is saved in the file and now comparing items of this # configuration with actual DBS results, the structure of pileupDict: # {"pileupTypeA": {"BlockA": {"FileList": [], "StorageElementNames": []}, # "BlockB": {"FileList": [], "StorageElementName": []}, ....} for pileupType, datasets in inputArgs.items(): # this is from the pileup configuration produced by PileupFetcher blockDict = pileupDict[pileupType] m = "Number of datasets for pileup type '%s' is not equal." % pileupType self.assertEqual(len(blockDict), len(datasets), m) for dataset in datasets: dbsFileBlocks = dbsApi.listBlocks(dataset=dataset) fileList = [ ] # list of files in the block (dbsFile["LogicalFileName"]) storageElemNames = [] # list of StorageElementName for dbsFileBlock in dbsFileBlocks: blockName = dbsFileBlock["Name"] # each DBS block has a list under 'StorageElementList', iterate over for storElem in dbsFileBlock["StorageElementList"]: storageElemNames.append(storElem["Name"]) # now get list of files in the block dbsFiles = dbsApi.listFiles(blockName=blockName) for dbsFile in dbsFiles: fileList.append(dbsFile["LogicalFileName"]) # now compare the sets: m = ("StorageElementNames don't agree for pileup type '%s', " "dataset '%s' in configuration: '%s'" % (pileupType, dataset, pileupDict)) self.assertEqual(blockDict[blockName]["StorageElementNames"], storageElemNames, m) m = ("FileList don't agree for pileup type '%s', dataset '%s' " " in configuration: '%s'" % (pileupType, dataset, pileupDict)) self.assertEqual(blockDict[blockName]["FileList"], fileList)
class BlcokConsitencyTest(object): def __init__(self): paConfig = loadProdAgentConfiguration() t0astDBConfig = paConfig.getConfig("Tier0DB") self.t0astDBConn = Tier0DB.Tier0DB(t0astDBConfig, manageGlobal = True) self.t0astDBConn.connect() localDBSConfig = paConfig.getConfig("LocalDBS") globalDBSConfig = paConfig.getConfig("GlobalDBSDLS") # phedexConfig = paConfig.getConfig("PhEDExConfig") self.localDBSUrl = localDBSConfig["DBSURL"] self.globalDBSUrl = globalDBSConfig["DBSURL"] # self.phedexDSUrl = phedexConfig["DataServiceURL"] self.localDbsApi = DbsApi({'url':localDBSConfig["DBSURL"]}) self.globalDbsApi = DbsApi({'url':globalDBSConfig["DBSURL"]}) def listMissingBlocksFromDBS(self, dbsApi, blockStatus, migrateStatus): blockList = ListBlock.listBlocksByStatus(self.t0astDBConn, blockStatus) missingBlockList = [] for block in blockList: dbsBlock = dbsApi.listBlocks(block_name=block["BLOCK_NAME"]) if dbsBlock == []: missingBlockList.append(block) return missingBlockList def listAllBlocksFromDBS(self, dbsApi, blockStatus, migrateStatus): blockList = ListBlock.listBlocksByStatus(self.t0astDBConn, blockStatus, migrateStatus) dbsBlockList = [] for block in blockList: dbsBlock = dbsApi.listBlocks(block_name=block["BLOCK_NAME"]) if dbsBlock != []: dbsBlockList.append(block) return dbsBlockList def printMissingBlockName(self, location, status, migrateStatus = None, verbose = 1): if location == "local": dbsApi = self.localDbsApi dbsUrl = self.localDBSUrl elif location == "global": dbsApi = self.globalDbsApi dbsUrl = self.globalDBSUrl verboseLevel = int(verbose) print "For the all %s, %s block -" % (status, migrateStatus) if verboseLevel > 2: allBlockList = self.listAllBlocksFromDBS(dbsApi, status, migrateStatus) print "All t0ast blocks in DBS : %s number: %s" % (dbsUrl, len(allBlockList)) self.printBlocksAndFiles(location, allBlockList, "all", verboseLevel) blockList = self.listMissingBlocksFromDBS(dbsApi, status, migrateStatus) if blockList == []: print "There is no missing blocks in DBS : %s" % dbsUrl else: print "List of missing blocks from DBS : %s number: %s" % (dbsUrl, len(blockList)) self.printBlocksAndFiles(location, blockList, "missing", verboseLevel) print "the number of missing block: %s" % len(blockList) def printBlocksAndFiles(self, location, blockList, type = "missing", verbose = 1): """ print blocks and files: verbose = 1 only blocks, verbose > 1 blocks and files """ for block in blockList: print "\n" print "#################################################################" print "Block ID: %s Name: %s" % (block["BLOCK_ID"], block["BLOCK_NAME"]) if verbose < 2: continue if location == "global": print "\n" print "List all files from %s block from Global DBS: Get info from local DBS " % type for file in self.localDbsApi.listFiles(blockName=block["BLOCK_NAME"]): print "parent list: %s" % file["ParentList"] print "file LFN: %s" % file["LogicalFileName"] print "\n" fileIDs = ListFiles.listFileIDsByBlockID(self.t0astDBConn, block["BLOCK_ID"]) print "List all files from %s block from Global DBS: Get info from T0AST " % type print "=====================================================" for fileID in fileIDs: wmbsFile = WMBSFile(id = fileID) wmbsFile.load() print "--------------------------------------" print "Info from: T0AST" print "file LFN: %s" % wmbsFile["lfn"] print "" file = T0ASTFile(wmbsFile) file.datasetPathID = \ ListDatasets.listDatasetIDForWMBSFile(self.t0astDBConn, wmbsFile["id"]) datasetNames = \ ListDatasets.listDatasetNamesForWMBSFile(self.t0astDBConn, wmbsFile["id"]) file["PRIMARY_DATASET"] = datasetNames["PRIMARY"] file["PROCESSED_DATASET"] = datasetNames["PROCESSED"] file["DATA_TIER"] = datasetNames["TIER"] if file["DATA_TIER"] == "RECO": t0ParentFileList = file.getParentList(type="file") for wmbsFile in t0ParentFileList: t0File = T0ASTFile(wmbsFile) t0File["BLOCK_ID"] = ListBlock.getBlockIDByFileID(self.t0astDBConn, wmbsFile["id"]) print "Block ID: %s : Parent File: %s" % (t0File["BLOCK_ID"], t0File["LFN"]) if t0File["BLOCK_ID"] != None: blockInfo = ListBlock.getBlockInfoByID(self.t0astDBConn, t0File["BLOCK_ID"]) print "Block Name: %s \nStatus: %s" % (blockInfo["BLOCK_NAME"], blockInfo["STATUS"]) if blockInfo["STATUS"] == "InFlight" or blockInfo["MIGRATE_STATUS"] == "Migrated": dbsBlock = self.localDbsApi.listBlocks(block_name=blockInfo["BLOCK_NAME"]) if dbsBlock == []: print "It doesn't exist in Local dbs: Something wrong" else: print "Block: %s exist in Local DBS" % blockInfo["BLOCK_NAME"] try: for file in self.localDbsApi.listFiles(patternLFN=t0File["LFN"]): print "File: %s exist in Local DBS" % file["LogicalFileName"] except: print "File doesn't exist in Local DBS" if blockInfo["MIGRATE_STATUS"] == "Migrated": dbsBlock = self.globalDbsApi.listBlocks(block_name=blockInfo["BLOCK_NAME"]) if dbsBlock == []: print "It doesn't exist in Global dbs: Something wrong" else: print "Block: %s exist in Global DBS" % blockInfo["BLOCK_NAME"] try: for file in self.globalDbsApi.listFiles(patternLFN=t0File["LFN"]): print "File: %s exist in Global DBS" % file["LogicalFileName"] except: print "File doesn't exist in Global DBS" print "\n" print "Info from Local DBS: List all parent files from %s block:" % type try: for pfile in self.localDbsApi.listFileParents(file["LFN"]): print "Parent Block: %s" % pfile["Block"]["NAME"] print "Parent File: %s" % pfile["LogicalFileName"] print "Info from Global DBS: parent block for %s block:" % type blockList = self.globalDbsApi.listBlocks(block_name=block["BLOCK_NAME"]) if blockList == []: print "Global DBS doen't have block %s "% pfile["Block"]["NAME"] else: for dbsBlock in blockList: print "Global DBS Parent block %s exsist" % dbsBlock["NAME"] except Exception, ex: print "No parents file found in Local DBS " print "====================================================="
else : apiSrc = DbsApi(argsSrc) if(argsTar['url'] == "LOCALDBS" ): apiTar = DbsApi(opts.__dict__) else: apiTar = DbsApi(argsTar) path = sys.argv[3] print path #name = argsSrc['url'].replace('/','_') + "_" + argsTar['url'].replace('/', '_') + path.replace('/', '_') #name = argsSrc['url'].replace('/','_').replace(':', '_') + "_" + argsTar['url'].replace('/', '_').replace(':', '_') name = "Transfer_" blocks = apiSrc.listBlocks(path) if ((op == "both") | (op == "get")) : #Fetch the dataset contents and save them in a file for i in blocks: blockName = i['Name'] fileName = blockName.replace('/', '_').replace('#', '_') + ".xml" if os.path.exists(name + fileName): print "WARNNING The XML file " + name + fileName + " exists already and will be used. The information will not be fetched again" else: print "Fetching information for Block %s " % blockName xmlinput = apiSrc.listDatasetContents(path, blockName) f = open(name + fileName, "w"); f.write(xmlinput) f.close() print "Dataset information fetched from " + argsSrc['url'] + " in XML format is saved in " + name + fileName
except getopt.GetoptError: print "Please specify --dataset" sys.exit(2) for myopt, myarg in myopts : if myopt == "--dataset" : datasetpath = myarg if datasetpath == None: print "Please specify --dataset" sys.exit(2) cern_se = 'srm.cern.ch' try: api = DbsApi(dbsargs) try: blocks = api.listBlocks(dataset=datasetpath,block_name="*",storage_element_name="*"); except: blocks = [] for block in blocks: for se in block['StorageElementList'] : if se['Name'] != cern_se : api.deleteReplicaFromBlock(block['Name'],se['Name']) except DbsApiException, ex: print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage() ) if ex.getErrorCode() not in (None, ""): print "DBS Exception Error Code: ", ex.getErrorCode()
class DBSWriter: """ _DBSWriter_ General API for writing data to DBS """ def __init__(self, url, **contact): args = {"url": url, "level": 'ERROR'} args.update(contact) try: self.dbs = DbsApi(args) self.args = args self.version = args.get('version', None) self.globalDBSUrl = args.get('globalDBSUrl', None) self.globalVersion = args.get('globalVersion', None) if self.globalDBSUrl: globalArgs = {'url': url, 'level': 'ERROR'} globalArgs.update(contact) self.globalDBS = DbsApi(globalArgs) except DbsException as ex: msg = "Error in DBSWriterError with DbsApi\n" msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) self.reader = DBSReader(**args) def createDatasets(self, workflowSpec): """ _createDatasets_ Create All the output datasets found in the workflow spec instance provided """ try: workflowSpec.payload.operate( _CreateDatasetOperator(self.dbs, workflowSpec)) except DbsException as ex: msg = "Error in DBSWriter.createDatasets\n" msg += "For Workflow: %s\n" % workflowSpec.workflowName() msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) return def insertFilesForDBSBuffer(self, files, procDataset, algos, jobType="NotMerge", insertDetectorData=False, maxFiles=100, maxSize=99999999, timeOut=None, fileCommitLength=5): """ _insertFiles_ list of files inserted in DBS """ #TODO: Whats the purpose of insertDetectorData if len(files) < 1: return affectedBlocks = [] insertFiles = [] addedRuns = [] seName = None #Get the algos in insertable form # logging.error("About to input algos") # logging.error(algos) ialgos = [ DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos ] #print ialgos for outFile in files: # // # // Convert each file into a DBS File object #// lumiList = [] #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file for runlumiinfo in outFile.getRuns(): lrun = long(runlumiinfo.run) run = DbsRun( RunNumber=lrun, NumberOfEvents=0, NumberOfLumiSections=0, TotalLuminosity=0, StoreNumber=0, StartOfRun=0, EndOfRun=0, ) #Only added if not added by another file in this loop, why waste a call to DBS if lrun not in addedRuns: self.dbs.insertRun(run) addedRuns.append( lrun) #save it so we do not try to add it again to DBS logging.debug("run %s added to DBS " % str(lrun)) for alsn in runlumiinfo: lumi = DbsLumiSection( LumiSectionNumber=long(alsn), StartEventNumber=0, EndEventNumber=0, LumiStartTime=0, LumiEndTime=0, RunNumber=lrun, ) lumiList.append(lumi) logging.debug("lumi list created for the file") dbsfile = DbsFile( #Checksum = str(outFile['cksum']), NumberOfEvents=outFile['events'], LogicalFileName=outFile['lfn'], FileSize=int(outFile['size']), Status="VALID", ValidationStatus='VALID', FileType='EDM', Dataset=procDataset, TierList=DBSWriterObjects.makeTierList( procDataset['Path'].split('/')[3]), AlgoList=ialgos, LumiList=lumiList, ParentList=outFile.getParentLFNs(), #BranchHash = outFile['BranchHash'], ) #Set checksums by hand #dbsfile['Checksum'] = 0 #Set a default? for entry in outFile['checksums'].keys(): #This should be a dictionary with a cktype key and cksum value if entry.lower() == 'cksum': dbsfile['Checksum'] = str(outFile['checksums'][entry]) elif entry.lower() == 'adler32': dbsfile['Adler32'] = str(outFile['checksums'][entry]) elif entry.lower() == 'md5': dbsfile['Md5'] = str(outFile['checksums'][entry]) #This check comes from ProdAgent, not sure if its required if len(outFile["locations"]) > 0: seName = list(outFile["locations"])[0] logging.debug("SEname associated to file is: %s" % seName) else: msg = "Error in DBSWriter.insertFiles\n" msg += "No SEname associated to file" #print "FAKING seName for now" #seName="cmssrm.fnal.gov" raise DBSWriterError(msg) insertFiles.append(dbsfile) # //Processing Jobs: # // Insert the lists of sorted files into the appropriate #// fileblocks sumSize = 0 sumFiles = 0 tmpFiles = [] blockList = [] #First, get the block. See if the block already exists try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, seName) fileBlock['files'] = [] #if not fileBlock in affectedBlocks: # affectedBlocks.append(fileBlock) except DbsException as ex: msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset['Path'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) filesToCommit = [] for file in insertFiles: # First see if the block is full if self.manageFileBlock(fileBlock=fileBlock, maxFiles=maxFiles, maxSize=maxSize, timeOut=timeOut, algos=ialgos, filesToCommit=filesToCommit, procDataset=procDataset): fileBlock['OpenForWriting'] = 0 if not fileBlock in affectedBlocks: affectedBlocks.append(fileBlock) # Then we need a new block try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, seName) fileBlock['files'] = [] except DbsException as ex: msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset['Path'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) fileBlock['files'].append(file['LogicalFileName']) filesToCommit.append(file) if len(filesToCommit) >= fileCommitLength: # Only commit the files if there are more of them then the maximum length try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] logging.debug("Inserted files: %s to FileBlock: %s" \ % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" msg += " %s\n" % ( [x['LogicalFileName'] for x in insertFiles], ) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if len(filesToCommit) > 0: try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] logging.debug("Inserted files: %s to FileBlock: %s" \ % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" msg += " %s\n" % ([x['LogicalFileName'] for x in insertFiles], ) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if not fileBlock in affectedBlocks: affectedBlocks.append(fileBlock) ## Do bulk inserts now for DBS #filesToCommit = [] #count = 0 #count2 = 0 #for file in insertFiles: # count += 1 # #Try and close the box # logging.error("Should have a file") # logging.error(len(filesToCommit)) # count2 += len(filesToCommit) # if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles, # maxSize = maxSize, timeOut = timeOut, algos = ialgos, # filesToCommit = filesToCommit, procDataset = procDataset): # fileBlock['OpenForWriting'] = '0' # if not fileBlock in affectedBlocks: # affectedBlocks.append(fileBlock) # # # # # Then we need a new block # try: # fileBlock = DBSWriterObjects.getDBSFileBlock( # self.dbs, # procDataset, # seName) # fileBlock['files'] = [] # except DbsException, ex: # msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" # msg += "Cannot retrieve FileBlock for dataset:\n" # msg += " %s\n" % procDataset['Path'] # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) # #At this point, we should commit the block as is # fileBlock['files'].append(file['LogicalFileName']) # if jobType == "MergeSpecial": # for file in fileList: # file['Block'] = fileBlock # msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(file['ParentList']),str(file)) # logging.debug(msg) # try: # # # # # # NOTE To Anzar From Anzar (File cloning as in DBS API can be done here and then I can use Bulk insert on Merged files as well) # self.dbs.insertMergedFile(file['ParentList'], # file) # # except DbsException, ex: # msg = "Error in DBSWriter.insertFiles\n" # msg += "Cannot insert merged file:\n" # msg += " %s\n" % file['LogicalFileName'] # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) # logging.debug("Inserted merged file: %s to FileBlock: %s"%(file['LogicalFileName'],fileBlock['Name'])) # else: # filesToCommit.append(file) # if len(filesToCommit) >= fileCommitLength: # # Only commit the files if there are more of them then the maximum length # try: # logging.error("About to commit %i files" %(len(filesToCommit))) # count2 += len(filesToCommit) # self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) # filesToCommit = [] # logging.debug("Inserted files: %s to FileBlock: %s" \ # % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) # # except DbsException, ex: # msg = "Error in DBSWriter.insertFiles\n" # msg += "Cannot insert processed files:\n" # msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) # # # # ## If we still have files to commit, commit them #logging.error("Got to the end of the loop") #logging.error(len(filesToCommit)) #logging.error(count2) #if len(filesToCommit) > 0: # try: # logging.error("About to insert some files") # self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) # filesToCommit = [] # logging.debug("Inserted files: %s to FileBlock: %s" \ # % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) # # except DbsException, ex: # msg = "Error in DBSWriter.insertFiles\n" # msg += "Cannot insert processed files:\n" # msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) if not fileBlock in affectedBlocks: affectedBlocks.append(fileBlock) return list(affectedBlocks) def insertFiles(self, fwkJobRep, insertDetectorData=False): """ _insertFiles_ Process the files in the FwkJobReport instance and insert them into the associated datasets A list of affected fileblock names is returned both for merged and unmerged fileblocks. Only merged blocks will have to be managed. #for merged file #blocks to facilitate management of those blocks. #This list is not populated for processing jobs since we dont really #care about the processing job blocks. """ insertLists = {} orderedHashes = [] affectedBlocks = set() if len(fwkJobRep.files) <= 0: msg = "Error in DBSWriter.insertFiles\n" msg += "No files found in FrameWorkJobReport for:\n" msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId msg += " Workflow: %s" % fwkJobRep.workflowSpecId raise DBSWriterError(msg) for outFile in fwkJobRep.sortFiles(): # // # // Convert each file into a DBS File object #// seName = None if "SEName" in outFile: if outFile['SEName']: seName = outFile['SEName'] logging.debug("SEname associated to file is: %s" % seName) ## remove the fallback to site se-name if no SE is associated to File ## because it's likely that there is some stage out problem if there ## is no SEName associated to the file. # if not seName: # if fwkJobRep.siteDetails.has_key("se-name"): # seName = fwkJobRep.siteDetails['se-name'] # seName = str(seName) # logging.debug("site SEname: %s"%seName) if not seName: msg = "Error in DBSWriter.insertFiles\n" msg += "No SEname associated to files in FrameWorkJobReport for " # msg += "No SEname found in FrameWorkJobReport for " msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId msg += " Workflow: %s" % fwkJobRep.workflowSpecId raise DBSWriterError(msg) try: if (insertDetectorData): dbsFiles = DBSWriterObjects.createDBSFiles( outFile, fwkJobRep.jobType, self.dbs) else: dbsFiles = DBSWriterObjects.createDBSFiles( outFile, fwkJobRep.jobType) except DbsException as ex: msg = "Error in DBSWriter.insertFiles:\n" msg += "Error creating DbsFile instances for file:\n" msg += "%s\n" % outFile['LFN'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if len(dbsFiles) <= 0: msg = "No DbsFile instances created. Not enough info in the FrameWorkJobReport for" msg += "==> JobSpecId: %s" % fwkJobRep.jobSpecId msg += " Workflow: %s" % fwkJobRep.workflowSpecId raise DBSWriterError(msg) for f in dbsFiles: datasetName = makeDBSDSName(f) hashName = "%s-%s" % (seName, datasetName) if hashName not in insertLists: insertLists[hashName] = _InsertFileList( seName, datasetName) insertLists[hashName].append(f) if not orderedHashes.count(hashName): orderedHashes.append(hashName) # //Processing Jobs: # // Insert the lists of sorted files into the appropriate #// fileblocks for hash in orderedHashes: fileList = insertLists[hash] procDataset = fileList[0]['Dataset'] try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, fileList.seName) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset msg += "In Storage Element:\n %s\n" % fileList.seName msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if fwkJobRep.jobType == "Merge": # // # // Merge files #// for mergedFile in fileList: mergedFile['Block'] = fileBlock affectedBlocks.add(fileBlock['Name']) msg = "calling: self.dbs.insertMergedFile(%s, %s)" % (str( mergedFile['ParentList']), str(mergedFile)) logging.debug(msg) try: self.dbs.insertMergedFile(mergedFile['ParentList'], mergedFile) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert merged file:\n" msg += " %s\n" % mergedFile['LogicalFileName'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) logging.debug( "Inserted merged file: %s to FileBlock: %s" % (mergedFile['LogicalFileName'], fileBlock['Name'])) else: # // # // Processing files #// affectedBlocks.add(fileBlock['Name']) msg = "calling: self.dbs.insertFiles(%s, %s, %s)" % ( str(procDataset), str(list(fileList)), str(fileBlock)) logging.debug(msg) try: self.dbs.insertFiles(procDataset, list(fileList), fileBlock) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" msg += " %s\n" % ([x['LogicalFileName'] for x in fileList], ) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) logging.debug("Inserted files: %s to FileBlock: %s" % (([x['LogicalFileName'] for x in fileList]), fileBlock['Name'])) return list(affectedBlocks) def manageFileBlock(self, fileBlock, maxFiles=100, maxSize=None, timeOut=None, algos=[], filesToCommit=[], procDataset=None): """ _manageFileBlock_ Check to see wether the fileblock with the provided name is closeable based on number of files or total size. If the block equals or exceeds wither the maxFiles or maxSize parameters, close the block and return True, else do nothing and return False """ # // # // Check that the block exists, and is open before we close it #// fileblockName = fileBlock['Name'] blockInstance = self.dbs.listBlocks(block_name=fileblockName) if len(blockInstance) > 1: msg = "Multiple Blocks matching name: %s\n" % fileblockName msg += "Unable to manage file block..." raise DBSWriterError(msg) if len(blockInstance) == 0: msg = "Block name %s not found\n" % fileblockName msg += "Cant manage a non-existent fileblock" raise DBSWriterError(msg) blockInstance = blockInstance[0] isClosed = blockInstance.get('OpenForWriting', '1') if isClosed != '1': msg = "Block %s already closed" % fileblockName logging.warning(msg) # Now we need to commit files if len(filesToCommit) > 0: try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" raise DBSWriterError(msg) # Attempting to migrate to global if self.globalDBSUrl: self.dbs.dbsMigrateBlock( srcURL=self.args['url'], dstURL=self.globalDBSUrl, block_name=fileblockName, srcVersion=self.version, dstVersion=self.globalVersion, ) #for algo in algos: # self.globalDBS.insertAlgoInPD(dataset = get_path(fileblockName.split('#')[0]), # algorithm = algo) logging.info( "Migrated block %s to global due to pre-closed status" % (fileblockName)) else: logging.error( "Should've migrated block %s because it was already closed, but didn't" % (fileblockName)) return True # // # // We have an open block, sum number of files and file sizes #// #fileCount = int(blockInstance.get('NumberOfFiles', 0)) fileCount = len(fileBlock['files']) totalSize = float(blockInstance.get('BlockSize', 0)) msg = "Fileblock: %s\n ==> Size: %s Files: %s\n" % ( fileblockName, totalSize, fileCount) logging.warning(msg) # // # // Test close block conditions #// closeBlock = False if timeOut: if int(time.time()) - int(blockInstance['CreationDate']) > timeOut: closeBlock = True msg = "Closing Block based on timeOut: %s" % fileblockName logging.debug(msg) if fileCount >= maxFiles: closeBlock = True msg = "Closing Block Based on files: %s" % fileblockName logging.debug(msg) if maxSize != None: if totalSize >= maxSize: closeBlock = True msg = "Closing Block Based on size: %s" % fileblockName logging.debug(msg) if closeBlock: # Now we need to commit files if len(filesToCommit) > 0: try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] #logging.debug("Inserted files: %s to FileBlock: %s" \ # % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" #msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) # // # // Close the block #// self.dbs.closeBlock( DBSWriterObjects.createDBSFileBlock(fileblockName)) if self.globalDBSUrl: self.dbs.dbsMigrateBlock(srcURL=self.args['url'], dstURL=self.globalDBSUrl, block_name=fileblockName, srcVersion=self.version, dstVersion=self.globalVersion) for algo in algos: pass #self.globalDBS.insertAlgoInPD(dataset = get_path(fileblockName.split('#')[0]), # algorithm = algo) logging.info("Migrated block %s to global" % (fileblockName)) else: logging.error("Should've migrated block %s, but didn't" % (fileblockName)) return closeBlock def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks): """ _migrateDatasetBlocks_ Migrate the list of fileblocks provided by blocks, belonging to the dataset specified by the dataset path to this DBS instance from the inputDBSUrl provided - *inputDBSUrl* : URL for connection to input DBS - *datasetPath* : Name of dataset in input DBS (must exist in input DBS) - *blocks* : list of block names to be migrated (must exist in input DBS) """ if len(blocks) == 0: msg = "FileBlocks not provided.\n" msg += "You must provide the name of at least one fileblock\n" msg += "to be migrated" raise DBSWriterError(msg) # // # // Hook onto input DBSUrl and verify that the dataset & blocks #// exist reader = DBSReader(inputDBSUrl) inputBlocks = reader.listFileBlocks(datasetPath) for block in blocks: # // # // Test block exists at source #// if block not in inputBlocks: msg = "Block name:\n ==> %s\n" % block msg += "Not found in input dataset:\n ==> %s\n" % datasetPath msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl raise DBSWriterError(msg) # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if not self.reader.blockIsOpen(block): msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Migration of that block" logging.warning(msg) continue try: xferData = reader.dbs.listDatasetContents(datasetPath, block) except DbsException as ex: msg = "Error in DBSWriter.migrateDatasetBlocks\n" msg += "Could not read content of dataset:\n ==> %s\n" % ( datasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) xferData = _remapBlockParentage(datasetPath, xferData) try: self.dbs.insertDatasetContents(xferData) except DbsException as ex: msg = "Error in DBSWriter.migrateDatasetBlocks\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( datasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) del xferData return def importDatasetWithExistingParents(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed=True): """ _importDataset_ Import a dataset into the local scope DBS. It complains if the parent dataset ar not there!! - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed, locations=False) for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if not str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str( inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) logging.info("Update block locations to:") for sename in locations: self.dbs.addReplicaToBlock(block, sename) logging.info(sename) continue try: xferData = reader.dbs.listDatasetContents( sourceDatasetPath, block) except DbsException as ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not read content of dataset:\n ==> %s\n" % ( sourceDatasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) try: self.dbs.insertDatasetContents(xferData) except DbsException as ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) del xferData locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) for sename in locations: self.dbs.addReplicaToBlock(block, sename) return def importDataset(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed=True): """ _importDataset_ Import a dataset into the local scope DBS with full parentage hirerarchy (at least not slow because branches info is dropped) - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed, locations=False) blkCounter = 0 for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// blkCounter = blkCounter + 1 msg = "Importing block %s of %s: %s " % (blkCounter, len(inputBlocks), block) logging.debug(msg) if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str( inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDataset\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) logging.info("Update block locations to:") for sename in locations: self.dbs.addReplicaToBlock(block, sename) logging.info(sename) continue try: self.dbs.migrateDatasetContents(sourceDBS, targetDBS, sourceDatasetPath, block_name=block, noParentsReadOnly=False) except DbsException as ex: msg = "Error in DBSWriter.importDataset\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDataset\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) for sename in locations: self.dbs.addReplicaToBlock(block, sename) return def importDatasetWithoutParentage(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed=True): """ _importDataset_ Import a dataset into the local scope DBS with one level parentage, however it has severe limitation on its use due to the "ReadOnly" concept. - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed, locations=False) for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str( inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithoutParentage\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) logging.info("Update block locations to:") for sename in locations: self.dbs.addReplicaToBlock(block, sename) logging.info(sename) continue try: self.dbs.migrateDatasetContents(sourceDBS, targetDBS, sourceDatasetPath, block_name=block, noParentsReadOnly=True) except DbsException as ex: msg = "Error in DBSWriter.importDatasetWithoutParentage\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath, ) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithoutParentage\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) for sename in locations: self.dbs.addReplicaToBlock(block, sename) return def getOutputDatasetsWithPSet(payloadNode): """ _getOutputDatasetsWithPSet_ Extract all the information about output datasets from the payloadNode object provided, including the {{}} format PSet cfg Returns a list of DatasetInfo objects including App details from the node. """ result = [] for item in payloadNode._OutputDatasets: resultEntry = DatasetInfo() resultEntry.update(item) resultEntry["ApplicationName"] = payloadNode.application[ 'Executable'] resultEntry["ApplicationProject"] = payloadNode.application[ 'Project'] resultEntry["ApplicationVersion"] = payloadNode.application[ 'Version'] resultEntry["ApplicationFamily"] = item.get( "OutputModuleName", "AppFamily") try: config = payloadNode.cfgInterface psetStr = config.originalContent() resultEntry['PSetContent'] = psetStr except Exception as ex: resultEntry['PSetContent'] = None result.append(resultEntry) return _sortDatasets(result)
#!/usr/bin/env python # # Revision: 1.3 $" # Id: DBSXMLParser.java,v 1.3 2006/10/26 18:26:04 afaq Exp $" # # API Unit tests for the DBS JavaServer. import sys from DBSAPI.dbsApi import DbsApi from DBSAPI.dbsException import * from DBSAPI.dbsApiException import * from DBSAPI.dbsOptions import DbsOptionParser try: optManager = DbsOptionParser() (opts,args) = optManager.getOpt() api = DbsApi(opts.__dict__) for dataset in api.listDatasetPaths(): print "\n %s" %str(dataset) print "Dataset parent: %s" %str(api.listPathParents) for block in api.listBlocks(dataset): print "block: %s" %str(block['Name']) for parent in api.listBlockParents(block_name=block['Name']): print "Parent: %s" % str(parent['Name']) except DbsApiException, ex: print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage() ) if ex.getErrorCode() not in (None, ""): print "DBS Exception Error Code: ", ex.getErrorCode()
optManager = DbsOptionParser() (opts,args) = optManager.getOpt() api = DbsApi(opts.__dict__) datasets=[ #"/Cosmics/Commissioning08-v1/RAW", #"/TTbar/Summer09-MC_31X_V3-v1/GEN-SIM-RAW", "/TTbar/Summer09-MC_31X_V3-v1/GEN-SIM-RECO", "/BeamHalo/Summer09-STARTUP31X_V7_StreamMuAlBeamHaloOverlaps-v1/ALCARECO", "/InclusiveMu15/Summer09-MC_31X_V3_7TeV-v1/GEN-SIM-RAW", "/Cosmics/CMSSW_3_2_7-CRAFT09_R_V4_CosmicsSeq-v1/RECO", "/Wmunu/Summer09-MC_31X_V3_7TeV_SD_L1_L2_Mu-v1/GEN-SIM-RECO", "/Wmunu/Summer09-MC_31X_V3_7TeV_SD_Mu9-v1/GEN-SIM-RECO" ] #datasets=["/TTbar/Summer09-MC_31X_V3-v1/GEN-SIM-RAW"] for dataset in datasets : blocks=api.listBlocks(dataset) for ablock in blocks: data=api.listDatasetContents(dataset, ablock["Name"]) #print data print "-- SQL Statements for Dataset : %s and Block : %s " % (dataset, ablock["Name"]) class Handler (xml.sax.handler.ContentHandler): def __init__(self): self.sqls={} self.sqls['paths']=[] self.sqls['storage_element']=[] self.sqls['block_storage_elements']=[] self.sqls['file']=[] self.sqls['app_version']=[] self.sqls['app_executable_name']=[] self.sqls['ps_hash']=[]
from DBSAPI.dbsApi import DbsApi from DBSAPI.dbsException import * from DBSAPI.dbsApiException import * from DBSAPI.dbsOptions import DbsOptionParser try: optManager = DbsOptionParser() (opts, args) = optManager.getOpt() #print opts.__dict__ api = DbsApi(opts.__dict__) #for block in api.listBlocks("/TestPrimary_001_20070315_03h12m26s/TestProcessed_20070315_03h12m26s/GEN-SIM"): #for block in api.listBlocks("/test_primary_001/TestProcessedDS001/GEN-SIM"): #for block in api.listBlocks("/chi1/CMSSW_1_6_7-CSA07-3268/GEN-SIM-DIGI-RAW"): #for block in api.listBlocks("/dataset_PD_110/CRUZET3-v1-unmerged/RAW"): for block in api.listBlocks( "/RelValSingleMuPt10/CMSSW_2_1_2_IDEAL_V6_v3/GEN-SIM-RECO"): #for block in api.listBlocks(block_name="/test_primary_001*"): #for block in api.listBlocks("", "/TestPrimary_001_20070315_02h26m11s/TestProcessed_20070315_02h26m11s/GEN-SIM#016712"): #for block in api.listBlocks("/test_primary_001/TestProcessedDS001/GEN-SIM", "/test_primary_001/TestProcessedDS001/GEN*"): #for block in api.listBlocks("/TestPrimary_001_20070315_02h53m32s/TestPrimary_001_20070315_02h53m32s/GEN-SIM"): #print "%s %s" % (block['Name'], block['StorageElementList']) #print " %s" % block['Name'] print " %s" % block except DbsApiException, ex: print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage()) if ex.getErrorCode() not in (None, ""): print "DBS Exception Error Code: ", ex.getErrorCode()
class DBSWriter: """ _DBSWriter_ General API for writing data to DBS """ def __init__(self, url, **contact): args = { "url" : url, "level" : 'ERROR'} args.update(contact) try: self.dbs = DbsApi(args) self.args = args self.version = args.get('version', None) self.globalDBSUrl = args.get('globalDBSUrl', None) self.globalVersion = args.get('globalVersion', None) if self.globalDBSUrl: globalArgs = {'url': url, 'level': 'ERROR'} globalArgs.update(contact) self.globalDBS = DbsApi(globalArgs) except DbsException as ex: msg = "Error in DBSWriterError with DbsApi\n" msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) self.reader = DBSReader(**args) def createDatasets(self, workflowSpec): """ _createDatasets_ Create All the output datasets found in the workflow spec instance provided """ try: workflowSpec.payload.operate( _CreateDatasetOperator(self.dbs, workflowSpec) ) except DbsException as ex: msg = "Error in DBSWriter.createDatasets\n" msg += "For Workflow: %s\n" % workflowSpec.workflowName() msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) return def insertFilesForDBSBuffer(self, files, procDataset, algos, jobType = "NotMerge", insertDetectorData = False, maxFiles = 100, maxSize = 99999999, timeOut = None, fileCommitLength = 5): """ _insertFiles_ list of files inserted in DBS """ #TODO: Whats the purpose of insertDetectorData if len(files) < 1: return affectedBlocks = [] insertFiles = [] addedRuns=[] pnn = None #Get the algos in insertable form # logging.error("About to input algos") # logging.error(algos) ialgos = [DBSWriterObjects.createAlgorithmForInsert(dict(algo)) for algo in algos ] #print ialgos for outFile in files: # // # // Convert each file into a DBS File object #// lumiList = [] #Somehing similar should be the real deal when multiple runs/lumi could be returned from wmbs file for runlumiinfo in outFile.getRuns(): lrun=long(runlumiinfo.run) run = DbsRun( RunNumber = lrun, NumberOfEvents = 0, NumberOfLumiSections = 0, TotalLuminosity = 0, StoreNumber = 0, StartOfRun = 0, EndOfRun = 0, ) #Only added if not added by another file in this loop, why waste a call to DBS if lrun not in addedRuns: self.dbs.insertRun(run) addedRuns.append(lrun) #save it so we do not try to add it again to DBS logging.debug("run %s added to DBS " % str(lrun)) for alsn in runlumiinfo: lumi = DbsLumiSection( LumiSectionNumber = long(alsn), StartEventNumber = 0, EndEventNumber = 0, LumiStartTime = 0, LumiEndTime = 0, RunNumber = lrun, ) lumiList.append(lumi) logging.debug("lumi list created for the file") dbsfile = DbsFile( #Checksum = str(outFile['cksum']), NumberOfEvents = outFile['events'], LogicalFileName = outFile['lfn'], FileSize = int(outFile['size']), Status = "VALID", ValidationStatus = 'VALID', FileType = 'EDM', Dataset = procDataset, TierList = DBSWriterObjects.makeTierList(procDataset['Path'].split('/')[3]), AlgoList = ialgos, LumiList = lumiList, ParentList = outFile.getParentLFNs(), #BranchHash = outFile['BranchHash'], ) #Set checksums by hand #dbsfile['Checksum'] = 0 #Set a default? for entry in outFile['checksums'].keys(): #This should be a dictionary with a cktype key and cksum value if entry.lower() == 'cksum': dbsfile['Checksum'] = str(outFile['checksums'][entry]) elif entry.lower() == 'adler32': dbsfile['Adler32'] = str(outFile['checksums'][entry]) elif entry.lower() == 'md5': dbsfile['Md5'] = str(outFile['checksums'][entry]) #This check comes from ProdAgent, not sure if its required if len(outFile["locations"]) > 0: pnn = list(outFile["locations"])[0] logging.debug("PNN associated to file is: %s"%pnn) else: msg = "Error in DBSWriter.insertFiles\n" msg += "No PNN associated to file" #print "FAKING seName for now" #seName="cmssrm.fnal.gov" raise DBSWriterError(msg) insertFiles.append(dbsfile) # //Processing Jobs: # // Insert the lists of sorted files into the appropriate #// fileblocks sumSize = 0 sumFiles = 0 tmpFiles = [] blockList = [] #First, get the block. See if the block already exists try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, pnn) fileBlock['files'] = [] #if not fileBlock in affectedBlocks: # affectedBlocks.append(fileBlock) except DbsException as ex: msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset['Path'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) filesToCommit = [] for file in insertFiles: # First see if the block is full if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles, maxSize = maxSize, timeOut = timeOut, algos = ialgos, filesToCommit = filesToCommit, procDataset = procDataset): fileBlock['OpenForWriting'] = 0 if not fileBlock in affectedBlocks: affectedBlocks.append(fileBlock) # Then we need a new block try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, pnn) fileBlock['files'] = [] except DbsException as ex: msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset['Path'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) fileBlock['files'].append(file['LogicalFileName']) filesToCommit.append(file) if len(filesToCommit) >= fileCommitLength: # Only commit the files if there are more of them then the maximum length try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] logging.debug("Inserted files: %s to FileBlock: %s" \ % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if len(filesToCommit) > 0: try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] logging.debug("Inserted files: %s to FileBlock: %s" \ % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if not fileBlock in affectedBlocks: affectedBlocks.append(fileBlock) ## Do bulk inserts now for DBS #filesToCommit = [] #count = 0 #count2 = 0 #for file in insertFiles: # count += 1 # #Try and close the box # logging.error("Should have a file") # logging.error(len(filesToCommit)) # count2 += len(filesToCommit) # if self.manageFileBlock(fileBlock = fileBlock, maxFiles = maxFiles, # maxSize = maxSize, timeOut = timeOut, algos = ialgos, # filesToCommit = filesToCommit, procDataset = procDataset): # fileBlock['OpenForWriting'] = '0' # if not fileBlock in affectedBlocks: # affectedBlocks.append(fileBlock) # # # # # Then we need a new block # try: # fileBlock = DBSWriterObjects.getDBSFileBlock( # self.dbs, # procDataset, # seName) # fileBlock['files'] = [] # except DbsException, ex: # msg = "Error in DBSWriter.insertFilesForDBSBuffer\n" # msg += "Cannot retrieve FileBlock for dataset:\n" # msg += " %s\n" % procDataset['Path'] # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) # #At this point, we should commit the block as is # fileBlock['files'].append(file['LogicalFileName']) # if jobType == "MergeSpecial": # for file in fileList: # file['Block'] = fileBlock # msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(file['ParentList']),str(file)) # logging.debug(msg) # try: # # # # # # NOTE To Anzar From Anzar (File cloning as in DBS API can be done here and then I can use Bulk insert on Merged files as well) # self.dbs.insertMergedFile(file['ParentList'], # file) # # except DbsException, ex: # msg = "Error in DBSWriter.insertFiles\n" # msg += "Cannot insert merged file:\n" # msg += " %s\n" % file['LogicalFileName'] # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) # logging.debug("Inserted merged file: %s to FileBlock: %s"%(file['LogicalFileName'],fileBlock['Name'])) # else: # filesToCommit.append(file) # if len(filesToCommit) >= fileCommitLength: # # Only commit the files if there are more of them then the maximum length # try: # logging.error("About to commit %i files" %(len(filesToCommit))) # count2 += len(filesToCommit) # self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) # filesToCommit = [] # logging.debug("Inserted files: %s to FileBlock: %s" \ # % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) # # except DbsException, ex: # msg = "Error in DBSWriter.insertFiles\n" # msg += "Cannot insert processed files:\n" # msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) # # # # ## If we still have files to commit, commit them #logging.error("Got to the end of the loop") #logging.error(len(filesToCommit)) #logging.error(count2) #if len(filesToCommit) > 0: # try: # logging.error("About to insert some files") # self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) # filesToCommit = [] # logging.debug("Inserted files: %s to FileBlock: %s" \ # % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) # # except DbsException, ex: # msg = "Error in DBSWriter.insertFiles\n" # msg += "Cannot insert processed files:\n" # msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) # msg += "%s\n" % formatEx(ex) # raise DBSWriterError(msg) if not fileBlock in affectedBlocks: affectedBlocks.append(fileBlock) return list(affectedBlocks) def insertFiles(self, fwkJobRep, insertDetectorData = False): """ _insertFiles_ Process the files in the FwkJobReport instance and insert them into the associated datasets A list of affected fileblock names is returned both for merged and unmerged fileblocks. Only merged blocks will have to be managed. #for merged file #blocks to facilitate management of those blocks. #This list is not populated for processing jobs since we dont really #care about the processing job blocks. """ insertLists = {} orderedHashes = [] affectedBlocks = set() if len(fwkJobRep.files)<=0: msg = "Error in DBSWriter.insertFiles\n" msg += "No files found in FrameWorkJobReport for:\n" msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId msg += " Workflow: %s"%fwkJobRep.workflowSpecId raise DBSWriterError(msg) for outFile in fwkJobRep.sortFiles(): # // # // Convert each file into a DBS File object #// pnn = None if "PNN" in outFile: if outFile['PNN'] : pnn = outFile['PNN'] logging.debug("PNN associated to file is: %s"%pnn) ## remove the fallback to site se-name if no SE is associated to File ## because it's likely that there is some stage out problem if there ## is no SEName associated to the file. # if not seName: # if fwkJobRep.siteDetails.has_key("se-name"): # seName = fwkJobRep.siteDetails['se-name'] # seName = str(seName) # logging.debug("site SEname: %s"%seName) if not pnn: msg = "Error in DBSWriter.insertFiles\n" msg += "No PNN associated to files in FrameWorkJobReport for " # msg += "No SEname found in FrameWorkJobReport for " msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId msg += " Workflow: %s"%fwkJobRep.workflowSpecId raise DBSWriterError(msg) try: if ( insertDetectorData ): dbsFiles = DBSWriterObjects.createDBSFiles(outFile, fwkJobRep.jobType, self.dbs) else: dbsFiles = DBSWriterObjects.createDBSFiles(outFile, fwkJobRep.jobType) except DbsException as ex: msg = "Error in DBSWriter.insertFiles:\n" msg += "Error creating DbsFile instances for file:\n" msg += "%s\n" % outFile['LFN'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if len(dbsFiles)<=0: msg="No DbsFile instances created. Not enough info in the FrameWorkJobReport for" msg += "==> JobSpecId: %s"%fwkJobRep.jobSpecId msg += " Workflow: %s"%fwkJobRep.workflowSpecId raise DBSWriterError(msg) for f in dbsFiles: datasetName = makeDBSDSName(f) hashName = "%s-%s" % (pnn, datasetName) if hashName not in insertLists: insertLists[hashName] = _InsertFileList(pnn, datasetName) insertLists[hashName].append(f) if not orderedHashes.count(hashName): orderedHashes.append(hashName) # //Processing Jobs: # // Insert the lists of sorted files into the appropriate #// fileblocks for hash in orderedHashes: fileList = insertLists[hash] procDataset = fileList[0]['Dataset'] try: fileBlock = DBSWriterObjects.getDBSFileBlock( self.dbs, procDataset, fileList.pnn) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot retrieve FileBlock for dataset:\n" msg += " %s\n" % procDataset # msg += "In Storage Element:\n %s\n" % fileList.seName msg += "In PNN:\n %s\n" % fileList.pnn msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) if fwkJobRep.jobType == "Merge": # // # // Merge files #// for mergedFile in fileList: mergedFile['Block'] = fileBlock affectedBlocks.add(fileBlock['Name']) msg="calling: self.dbs.insertMergedFile(%s, %s)" % (str(mergedFile['ParentList']),str(mergedFile)) logging.debug(msg) try: self.dbs.insertMergedFile(mergedFile['ParentList'], mergedFile) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert merged file:\n" msg += " %s\n" % mergedFile['LogicalFileName'] msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) logging.debug("Inserted merged file: %s to FileBlock: %s"%(mergedFile['LogicalFileName'],fileBlock['Name'])) else: # // # // Processing files #// affectedBlocks.add(fileBlock['Name']) msg="calling: self.dbs.insertFiles(%s, %s, %s)" % (str(procDataset),str(list(fileList)),str(fileBlock)) logging.debug(msg) try: self.dbs.insertFiles(procDataset, list(fileList), fileBlock) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" msg += " %s\n" % ( [ x['LogicalFileName'] for x in fileList ], ) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) logging.debug("Inserted files: %s to FileBlock: %s"%( ([ x['LogicalFileName'] for x in fileList ]),fileBlock['Name'])) return list(affectedBlocks) def manageFileBlock(self, fileBlock, maxFiles = 100, maxSize = None, timeOut = None, algos = [], filesToCommit = [], procDataset = None): """ _manageFileBlock_ Check to see wether the fileblock with the provided name is closeable based on number of files or total size. If the block equals or exceeds wither the maxFiles or maxSize parameters, close the block and return True, else do nothing and return False """ # // # // Check that the block exists, and is open before we close it #// fileblockName = fileBlock['Name'] blockInstance = self.dbs.listBlocks(block_name=fileblockName) if len(blockInstance) > 1: msg = "Multiple Blocks matching name: %s\n" % fileblockName msg += "Unable to manage file block..." raise DBSWriterError(msg) if len(blockInstance) == 0: msg = "Block name %s not found\n" % fileblockName msg += "Cant manage a non-existent fileblock" raise DBSWriterError(msg) blockInstance = blockInstance[0] isClosed = blockInstance.get('OpenForWriting', '1') if isClosed != '1': msg = "Block %s already closed" % fileblockName logging.warning(msg) # Now we need to commit files if len(filesToCommit) > 0: try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" raise DBSWriterError(msg) # Attempting to migrate to global if self.globalDBSUrl: self.dbs.dbsMigrateBlock(srcURL = self.args['url'], dstURL = self.globalDBSUrl, block_name = fileblockName, srcVersion = self.version, dstVersion = self.globalVersion, ) #for algo in algos: # self.globalDBS.insertAlgoInPD(dataset = get_path(fileblockName.split('#')[0]), # algorithm = algo) logging.info("Migrated block %s to global due to pre-closed status" %(fileblockName)) else: logging.error("Should've migrated block %s because it was already closed, but didn't" % (fileblockName)) return True # // # // We have an open block, sum number of files and file sizes #// #fileCount = int(blockInstance.get('NumberOfFiles', 0)) fileCount = len(fileBlock['files']) totalSize = float(blockInstance.get('BlockSize', 0)) msg = "Fileblock: %s\n ==> Size: %s Files: %s\n" % ( fileblockName, totalSize, fileCount) logging.warning(msg) # // # // Test close block conditions #// closeBlock = False if timeOut: if int(time.time()) - int(blockInstance['CreationDate']) > timeOut: closeBlock = True msg = "Closing Block based on timeOut: %s" % fileblockName logging.debug(msg) if fileCount >= maxFiles: closeBlock = True msg = "Closing Block Based on files: %s" % fileblockName logging.debug(msg) if maxSize != None: if totalSize >= maxSize: closeBlock = True msg = "Closing Block Based on size: %s" % fileblockName logging.debug(msg) if closeBlock: # Now we need to commit files if len(filesToCommit) > 0: try: self.dbs.insertFiles(procDataset, filesToCommit, fileBlock) filesToCommit = [] #logging.debug("Inserted files: %s to FileBlock: %s" \ # % ( ([ x['LogicalFileName'] for x in insertFiles ]),fileBlock['Name'])) except DbsException as ex: msg = "Error in DBSWriter.insertFiles\n" msg += "Cannot insert processed files:\n" #msg += " %s\n" % ([ x['LogicalFileName'] for x in insertFiles ],) msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) # // # // Close the block #// self.dbs.closeBlock( DBSWriterObjects.createDBSFileBlock(fileblockName) ) if self.globalDBSUrl: self.dbs.dbsMigrateBlock(srcURL = self.args['url'], dstURL = self.globalDBSUrl, block_name = fileblockName, srcVersion = self.version, dstVersion = self.globalVersion ) for algo in algos: pass #self.globalDBS.insertAlgoInPD(dataset = get_path(fileblockName.split('#')[0]), # algorithm = algo) logging.info("Migrated block %s to global" %(fileblockName)) else: logging.error("Should've migrated block %s, but didn't" % (fileblockName)) return closeBlock def migrateDatasetBlocks(self, inputDBSUrl, datasetPath, blocks): """ _migrateDatasetBlocks_ Migrate the list of fileblocks provided by blocks, belonging to the dataset specified by the dataset path to this DBS instance from the inputDBSUrl provided - *inputDBSUrl* : URL for connection to input DBS - *datasetPath* : Name of dataset in input DBS (must exist in input DBS) - *blocks* : list of block names to be migrated (must exist in input DBS) """ if len(blocks) == 0: msg = "FileBlocks not provided.\n" msg += "You must provide the name of at least one fileblock\n" msg += "to be migrated" raise DBSWriterError(msg) # // # // Hook onto input DBSUrl and verify that the dataset & blocks #// exist reader = DBSReader(inputDBSUrl) inputBlocks = reader.listFileBlocks(datasetPath) for block in blocks: # // # // Test block exists at source #// if block not in inputBlocks: msg = "Block name:\n ==> %s\n" % block msg += "Not found in input dataset:\n ==> %s\n" % datasetPath msg += "In DBS Instance:\n ==> %s\n" % inputDBSUrl raise DBSWriterError(msg) # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if not self.reader.blockIsOpen(block): msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Migration of that block" logging.warning(msg) continue try: xferData = reader.dbs.listDatasetContents(datasetPath, block) except DbsException as ex: msg = "Error in DBSWriter.migrateDatasetBlocks\n" msg += "Could not read content of dataset:\n ==> %s\n" % ( datasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) xferData = _remapBlockParentage(datasetPath, xferData) try: self.dbs.insertDatasetContents(xferData) except DbsException as ex: msg = "Error in DBSWriter.migrateDatasetBlocks\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( datasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) del xferData return def importDatasetWithExistingParents(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed = True): """ _importDataset_ Import a dataset into the local scope DBS. It complains if the parent dataset ar not there!! - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed, locations = False) for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if not str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) logging.info("Update block locations to:") for pnn in locations: self.dbs.addReplicaToBlock(block,pnn) logging.info(pnn) continue try: xferData = reader.dbs.listDatasetContents( sourceDatasetPath, block ) except DbsException as ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not read content of dataset:\n ==> %s\n" % ( sourceDatasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) try: self.dbs.insertDatasetContents(xferData) except DbsException as ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) del xferData locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) for pnn in locations: self.dbs.addReplicaToBlock(block,pnn) return def importDataset(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed = True): """ _importDataset_ Import a dataset into the local scope DBS with full parentage hirerarchy (at least not slow because branches info is dropped) - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed, locations = False) blkCounter=0 for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// blkCounter=blkCounter+1 msg="Importing block %s of %s: %s " % (blkCounter,len(inputBlocks),block) logging.debug(msg) if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDataset\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) logging.info("Update block locations to:") for pnn in locations: self.dbs.addReplicaToBlock(block,pnn) logging.info(pnn) continue try: self.dbs.migrateDatasetContents(sourceDBS, targetDBS, sourceDatasetPath, block_name=block, noParentsReadOnly = False) except DbsException as ex: msg = "Error in DBSWriter.importDataset\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDataset\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) for pnn in locations: self.dbs.addReplicaToBlock(block,pnn) return def importDatasetWithoutParentage(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed = True): """ _importDataset_ Import a dataset into the local scope DBS with one level parentage, however it has severe limitation on its use due to the "ReadOnly" concept. - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed, locations = False) for inputBlock in inputBlocks: block = inputBlock['Name'] # // # // Test block does not exist in target #// if self.reader.blockExists(block): # // # // block exists #// If block is closed dont attempt transfer if str(inputBlock['OpenForWriting']) != '1': msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithoutParentage\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) logging.info("Update block locations to:") for pnn in locations: self.dbs.addReplicaToBlock(block,pnn) logging.info(pnn) continue try: self.dbs.migrateDatasetContents(sourceDBS, targetDBS, sourceDatasetPath, block_name=block, noParentsReadOnly = True ) except DbsException as ex: msg = "Error in DBSWriter.importDatasetWithoutParentage\n" msg += "Could not write content of dataset:\n ==> %s\n" % ( sourceDatasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock['NumberOfFiles']) != "0": msg = "Error in DBSWriter.importDatasetWithoutParentage\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) for pnn in locations: self.dbs.addReplicaToBlock(block,pnn) return def getOutputDatasetsWithPSet(payloadNode): """ _getOutputDatasetsWithPSet_ Extract all the information about output datasets from the payloadNode object provided, including the {{}} format PSet cfg Returns a list of DatasetInfo objects including App details from the node. """ result = [] for item in payloadNode._OutputDatasets: resultEntry = DatasetInfo() resultEntry.update(item) resultEntry["ApplicationName"] = payloadNode.application['Executable'] resultEntry["ApplicationProject"] = payloadNode.application['Project'] resultEntry["ApplicationVersion"] = payloadNode.application['Version'] resultEntry["ApplicationFamily"] = item.get("OutputModuleName", "AppFamily") try: config = payloadNode.cfgInterface psetStr = config.originalContent() resultEntry['PSetContent'] = psetStr except Exception as ex: resultEntry['PSetContent'] = None result.append(resultEntry) return _sortDatasets(result)