def setUp(self): """ _setUp_ """ super(WMBSHelperTest, self).setUp() self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl" ], useDefault=False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = DBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=threading.currentThread().logger, dbinterface=threading.currentThread().dbi) return
def testlistDatasetFileDetails(self): """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset""" TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root' self.dbs = DBSReader(self.endpoint) details = self.dbs.listDatasetFileDetails(DATASET) self.assertEqual(len(details), 49) self.assertTrue(TESTFILE in details) self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545) self.assertEqual(details[TESTFILE]['file_size'], 286021145) self.assertEqual( details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace') self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['Checksum'], '22218315') self.assertEqual(details[TESTFILE]['check_sum'], '22218315') self.assertTrue(173658 in details[TESTFILE]['Lumis']) self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]), [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111 ])
def makePhEDExDrop(dbsUrl, datasetPath, *blockNames): """ _makePhEDExDrop_ Given a DBS2 Url, dataset name and list of blockNames, generate an XML structure for injection """ spec = XMLInjectionSpec(dbsUrl) reader = DBSReader(dbsUrl, version="DBS_2_0_9") dataset = spec.getDataset(datasetPath) for block in blockNames: blockContent = reader.getFileBlock(block) isOpen = reader.blockIsOpen(block) if isOpen: xmlBlock = dataset.getFileblock(block, "y") else: xmlBlock = dataset.getFileblock(block, "n") #Any Checksum from DBS is type cksum for x in blockContent[block]['Files']: checksums = {'cksum': x['Checksum']} if x.get('Adler32') not in (None, ''): checksums['adler32'] = x['Adler32'] xmlBlock.addFile(x['LogicalFileName'], checksums, x['FileSize']) xml = spec.save() return xml
def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], 22075) self.assertEqual(dataset['NumberOfBlocks'], 46) self.assertEqual(dataset['FileSize'], 4001680824) self.assertEqual(dataset['file_size'], 4001680824) self.assertEqual(dataset['NumberOfFiles'], 49) self.assertEqual(dataset['NumberOfLumis'], 7223) block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], '') self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], 377) self.assertEqual(block['NumberOfBlocks'], 1) self.assertEqual(block['FileSize'], 150780132) self.assertEqual(block['file_size'], 150780132) self.assertEqual(block['NumberOfFiles'], 2) self.assertEqual(block['NumberOfLumis'], 94) self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas')
def makePhEDExDrop(dbsUrl, datasetPath, *blockNames): """ _makePhEDExDrop_ Given a DBS Url, dataset name and list of blockNames, generate an XML structure for injection """ spec = XMLInjectionSpec(dbsUrl) reader = DBSReader(dbsUrl, version = "DBS_2_0_9") dataset = spec.getDataset(datasetPath) for block in blockNames: blockContent = reader.getFileBlock(block) isOpen = reader.blockIsOpen(block) if isOpen: xmlBlock = dataset.getFileblock(block, "y") else: xmlBlock = dataset.getFileblock(block, "n") #Any Checksum from DBS is type cksum for x in blockContent[block]['Files']: checksums = {'cksum' : x['Checksum']} if x.get('Adler32') not in (None, ''): checksums['adler32'] = x['Adler32'] xmlBlock.addFile(x['LogicalFileName'], checksums, x['FileSize']) xml = spec.save() return xml
def testlistDatasetFileDetails(self): """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset""" TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root' for endpoint in [ self.endpoint, 'test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py:' ]: self.dbs = DBSReader(endpoint) details = self.dbs.listDatasetFileDetails(DATASET) self.assertEqual(len(details), 49) self.assertTrue(TESTFILE in details) self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545) self.assertEqual(details[TESTFILE]['Size'], 286021145) self.assertEqual( details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace' ) self.assertEqual(details[TESTFILE]['Checksums'], { 'Checksum': '22218315', 'Adler32': 'a41a1446', 'Md5': 'NOTSET' }) self.assertTrue(173658 in details[TESTFILE]['Lumis']) self.assertEqual( sorted(details[TESTFILE]['Lumis'][173658]), sorted( map( long, [8, 12, 9, 14, 10, 6, 2, 1, 4, 3, 36, 49, 16, 11, 27, 35, 46, 39, 20, 24, 52, 23, 40, 42, 45, 21, 32, 37, \ 25, 22, 5, 33, 17, 15, 26, 50, 18, 29, 51, 44, 69, 43, 30, 73, 19, 41, 13, 38, 7, 31, 75, 48, 59, 65, 55, \ 57, 34, 28, 74, 47, 64, 61, 68, 77, 66, 71, 60, 76, 70, 67, 62, 78, 82, 79, 88, 56, 101, 92, 58, 72, 54, \ 63, 96, 53, 84, 95, 89, 85, 99, 81, 91, 102, 80, 100, 107, 94, 93, 90, 86, 87, 83, 97, 104, 110, 111, 106,\ 108, 98, 103, 109, 105])) )
def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue( FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas')
def getDBSPublicationInfo(self, outputDatasets): """ What has been published Get the lumis and number of events in the published output datasets. """ res = {} res['outputDatasets'] = {} for outputDataset in outputDatasets: res['outputDatasets'][outputDataset] = {'lumis': {}, 'numEvents': 0} try: dbs = DBSReader("https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader", cert=self.proxyfilename, key=self.proxyfilename) outputDatasetDetails = dbs.listDatasetFileDetails(outputDataset) except Exception as ex: msg = "Failed to retrieve information from DBS for output dataset %s." % (outputDataset) msg += " Exception while contacting DBS: %s" % (str(ex)) self.logger.exception(msg) else: outputDatasetLumis = self.compactLumis(outputDatasetDetails) outputDatasetLumis = LumiList(runsAndLumis=outputDatasetLumis).getCompactList() res['outputDatasets'][outputDataset]['lumis'] = outputDatasetLumis for outputFileDetails in outputDatasetDetails.values(): res['outputDatasets'][outputDataset]['numEvents'] += outputFileDetails['NumberOfEvents'] return res
def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" WRONG_BLOCK = BLOCK[:-4]+'abcd' BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace' DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e' DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab' self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/') # assume one site is cern sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('cern.ch') > -1] self.assertTrue(sites) #This block is only found on DBS self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK)) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK)) #test bulk call: ## two blocks in phedex self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2]))) ## one block in phedex one does not exist self.assertEqual(1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK]))) ## one in phedex one in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK]))) ## two in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2]))) ## one in DBS and one does not exist self.assertEqual(1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))
def testEmulator(self): EmulatorHelper.setEmulators(True, True, True, True) self.assertEqual(PhEDEx().wrapped.__module__, 'WMQuality.Emulators.PhEDExClient.PhEDEx') self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__, 'WMQuality.Emulators.DBSClient.DBSReader') self.assertEqual(SiteDBJSON().wrapped.__module__, 'WMQuality.Emulators.SiteDBClient.SiteDB') self.assertEqual(RequestManager().wrapped.__module__, 'WMQuality.Emulators.RequestManagerClient.RequestManager') self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx') self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBSReader') self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON') self.assertEqual(RequestManager().__class__.__name__, 'RequestManager') EmulatorHelper.resetEmulators() self.assertEqual(PhEDEx().wrapped.__module__, 'WMCore.Services.PhEDEx.PhEDEx') self.assertEqual(DBSReader(self.globalDBS).wrapped.__module__, 'WMCore.Services.DBS.DBS2Reader') self.assertEqual(SiteDBJSON().wrapped.__module__, 'WMCore.Services.SiteDB.SiteDB') self.assertEqual(RequestManager().wrapped.__module__, 'WMCore.Services.RequestManager.RequestManager') self.assertEqual(PhEDEx().__class__.__name__, 'PhEDEx') self.assertEqual(DBSReader(self.globalDBS).__class__.__name__, 'DBS2Reader') self.assertEqual(SiteDBJSON().__class__.__name__, 'SiteDBJSON') self.assertEqual(RequestManager().__class__.__name__, 'RequestManager')
def main(): problemElems = getProblematicRequests() print("Found %d bad elements that needs fixup" % len(problemElems)) if not problemElems: print("Nothing to fix, contact a developer if the problem persists...") return 0 cric = CRIC() dbsUrl = "https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader" dbs = DBSReader(dbsUrl) for elem in problemElems: print("Handling id: %s, with inputs: %s" % (elem.id, elem['Inputs'])) for dataItem in elem['Inputs']: if isDataset(dataItem): pnns = dbs.listDatasetLocation(dataItem, dbsOnly=True) else: pnns = dbs.listFileBlockLocation(dataItem, dbsOnly=True) psns = cric.PNNstoPSNs(pnns) print(" PNNs: %s map to PSNs: %s" % (pnns, psns)) elem['Inputs'][dataItem] = psns backend.saveElements(*problemElems) print("Done") return 0
def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], '22075') self.assertEqual(dataset['NumberOfBlocks'], '46') self.assertEqual(dataset['total_size'], '4001680824') self.assertEqual(dataset['NumberOfFiles'], '49') self.assertEqual(dataset['NumberOfLumis'], '7223') block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], '') self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], '377') self.assertEqual(block['NumberOfBlocks'], '1') self.assertEqual(block['total_size'], '150780132') self.assertEqual(block['NumberOfFiles'], '2') self.assertEqual(block['NumberOfLumis'], '94') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas')
def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block['Name'] in [x['Name'] for x in blocks]) self.assertEqual(BLOCK, block['Name']) #self.assertEqual(377, block['NumberOfEvents']) self.assertEqual(150780132, block['BlockSize']) self.assertEqual(2, block['NumberOfFiles']) # possibly fragile but assume block located at least at cern sites = [ x['Name'] for x in block['StorageElementList'] if x['Name'].find('cern.ch') > -1 ] self.assertTrue(sites) # weird error handling - depends on whether block or dataset is missing self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah') self.assertFalse( self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas'))
def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK) self.assertEqual([173657], runs)
def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue( FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah')
def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets('Jet', 'RAW') self.assertTrue('Run2011A-v1' in datasets) self.assertTrue('Run2011B-v1' in datasets) self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah')) self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW'))
def importDatasetWithExistingParents(self, sourceDBS, sourceDatasetPath, targetDBS, onlyClosed=True): """ _importDataset_ Import a dataset into the local scope DBS. It complains if the parent dataset ar not there!! - *sourceDBS* : URL for input DBS instance - *sourceDatasetPath* : Dataset Path to be imported - *targetDBS* : URL for DBS to have dataset imported to """ reader = DBSReader(sourceDBS) inputBlocks = reader.getFileBlocksInfo(sourceDatasetPath, onlyClosed) for inputBlock in inputBlocks: block = inputBlock["Name"] # // # // Test block does not exist in target # // if self.reader.blockExists(block): # // # // block exists # // If block is closed dont attempt transfer if not str(inputBlock["OpenForWriting"]) != "1": msg = "Block already exists in target DBS and is closed:\n" msg += " ==> %s\n" % block msg += "Skipping Import of that block" logging.warning(msg) locations = reader.listFileBlockLocation(block) # only empty file blocks can have no location if not locations and str(inputBlock["NumberOfFiles"]) != "0": msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Block has no locations defined: %s" % block raise DBSWriterError(msg) logging.info("Update block locations to:") for sename in locations: self.dbs.addReplicaToBlock(block, sename) logging.info(sename) continue try: xferData = reader.dbs.listDatasetContents(sourceDatasetPath, block) except DbsException, ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not read content of dataset:\n ==> %s\n" % (sourceDatasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg) try: self.dbs.insertDatasetContents(xferData) except DbsException, ex: msg = "Error in DBSWriter.importDatasetWithExistingParents\n" msg += "Could not write content of dataset:\n ==> %s\n" % (sourceDatasetPath,) msg += "Block name:\n ==> %s\n" % block msg += "%s\n" % formatEx(ex) raise DBSWriterError(msg)
def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas')
def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) self.assertTrue(BLOCK in blocks) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block)
def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents(BLOCK_WITH_PARENTS) self.assertEqual(1, len(parents)) self.assertEqual(PARENT_BLOCK, parents[0]['Name']) self.assertTrue(parents[0]['PhEDExNodeList']) self.assertFalse(self.dbs.listBlockParents(PARENT_BLOCK))
def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], 2782) runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK) self.assertEqual({173657: 94}, runs)
def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1') self.assertEqual(1, len(dataset)) self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList']) self.assertEqual('Run2011A-v1', dataset[0]['Name']) self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666'))
def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0') self.assertEqual(1, len(parents)) self.assertEqual('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0', parents[0]['Name']) sites = [x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1] self.assertTrue(sites) self.assertFalse(self.dbs.listBlockParents('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0'))
def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0') self.assertEqual(len(block), 1) block = block['/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0'] self.assertEqual('/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root', block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')
def createFilesetFromDBS(self, collection, filesetName, dbsURL, dataset, mask=None): """ _createFilesetFromDBS_ Get info from DBS, apply mask (filter) and create a fileset """ fileSet = CouchFileset(database=self.database, url=self.url, name=filesetName) fileSet.setCollection(collection) files = [] blockLocations = {} dbsReader = DBSReader(dbsURL, version="DBS_2_0_9", mode="GET") dbsResults = dbsReader.dbs.listFiles( path=dataset, retriveList=["retrive_lumi", "retrive_run"]) logging.info('Found %s files from DBS' % len(dbsResults)) for dbsResult in dbsResults: blockName = dbsResult["Block"]["Name"] if not blockName in blockLocations: blockLocations[blockName] = dbsReader.listFileBlockLocation( blockName) file = File(lfn=dbsResult["LogicalFileName"], size=dbsResult["FileSize"], merged=True, events=dbsResult["NumberOfEvents"], locations=blockLocations[blockName]) runs = {} for lumi in dbsResult["LumiList"]: runNumber = lumi['RunNumber'] runString = str(runNumber) lumiNumber = lumi["LumiSectionNumber"] if runString in runs: runs[runString].lumis.append(lumiNumber) else: runs[runString] = Run(runNumber, lumiNumber) for run in runs.values(): file.addRun(run) files.append(file) logging.info('Uploading %s files in fileset' % len(files)) fileList = fileSet.add(files, mask) return fileSet, fileList
def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents(BLOCK_WITH_PARENTS) self.assertEqual(len(block), 1) block = block[BLOCK_WITH_PARENTS] self.assertEqual(PARENT_FILE, block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')
def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0') self.assertEqual(4, len(files)) self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['block_name']) self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['BlockName']) self.assertEqual('/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')
def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listPrimaryDatasets('Jet*') self.assertTrue('Jet' in results) self.assertTrue('JetMET' in results) self.assertTrue('JetMETTau' in results) self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist')) return
def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts (None for DBS3)""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], None) runs = self.dbs.listRunLumis(block=BLOCK) self.assertEqual(1, len(runs)) self.assertTrue(173657 in runs) self.assertEqual(runs[173657], None)
def _queryAndCompareWithDBS(self, pileupDict, defaultArguments, dbsUrl): """ pileupDict is a Python dictionary containing particular pileup configuration information. Query DBS on given dataset contained now in both input defaultArguments as well as in the pileupDict and compare values. """ args = {} args["version"] = "DBS_2_0_9" args["mode"] = "GET" reader = DBSReader(dbsUrl, **args) inputArgs = defaultArguments["PileupConfig"] self.assertEqual(len(inputArgs), len(pileupDict), "Number of pileup types different.") for pileupType in inputArgs: m = ("pileup type '%s' not in PileupFetcher-produced pileup " "configuration: '%s'" % (pileupType, pileupDict)) self.assertTrue(pileupType in pileupDict, m) # now query DBS for compare actual results on files lists for each # pileup type and dataset and location (storage element names) # pileupDict is saved in the file and now comparing items of this # configuration with actual DBS results, the structure of pileupDict: # {"pileupTypeA": {"BlockA": {"FileList": [], "StorageElementNames": []}, # "BlockB": {"FileList": [], "StorageElementName": []}, ....} for pileupType, datasets in inputArgs.items(): # this is from the pileup configuration produced by PileupFetcher blockDict = pileupDict[pileupType] for dataset in datasets: dbsFileBlocks = reader.listFileBlocks(dataset = dataset) for dbsFileBlockName in dbsFileBlocks: fileList = [] # list of files in the block (dbsFile["LogicalFileName"]) storageElemNames = set() # list of StorageElementName # each DBS block has a list under 'StorageElementList', iterate over storageElements = reader.listFileBlockLocation(dbsFileBlockName) for storElem in storageElements: storageElemNames.add(storElem) # now get list of files in the block dbsFiles = reader.listFilesInBlock(dbsFileBlockName) for dbsFile in dbsFiles: fileList.append(dbsFile["LogicalFileName"]) # now compare the sets: m = ("StorageElementNames don't agree for pileup type '%s', " "dataset '%s' in configuration: '%s'" % (pileupType, dataset, pileupDict)) self.assertEqual(set(blockDict[dbsFileBlockName]["StorageElementNames"]), storageElemNames, m) m = ("FileList don't agree for pileup type '%s', dataset '%s' " " in configuration: '%s'" % (pileupType, dataset, pileupDict)) print fileList print blockDict[dbsFileBlockName]["FileList"] self.assertEqual(sorted(blockDict[dbsFileBlockName]["FileList"]), sorted(fileList))
def testListDatatiers(self): """ listDatatiers returns all datatiers available """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listDatatiers() self.assertTrue('RAW' in results) self.assertTrue('GEN-SIM-RECO' in results) self.assertTrue('GEN-SIM' in results) self.assertFalse('RAW-ALAN' in results) return
def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" self.dbs = DBSReader(self.endpoint) # assume one site is cern sites = [ x for x in self.dbs.listFileBlockLocation(BLOCK) if x.find('cern.ch') > -1 ] self.assertTrue(sites) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + 'blah'))
def validateDatatier(self, datatier, dbsUrl): """ _validateDatatier_ Provided a list of datatiers extracted from the outputDatasets, checks whether they all exist in DBS already. """ dbsReader = DBSReader(dbsUrl) dbsTiers = dbsReader.listDatatiers() badTiers = list(set(datatier) - set(dbsTiers)) if badTiers: raise cherrypy.HTTPError(400, "Bad datatier(s): %s not available in DBS." % badTiers)
def validateDatatier(self, datatier, dbsUrl): """ _validateDatatier_ Provided a list of datatiers extracted from the outputDatasets, checks whether they all exist in DBS already. """ dbsReader = DBSReader(dbsUrl) dbsTiers = dbsReader.listDatatiers() badTiers = list(set(datatier) - set(dbsTiers)) if badTiers: raise cherrypy.HTTPError( 400, "Bad datatier(s): %s not available in DBS." % badTiers)
def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60' ) self.assertEqual(len(block), 1) block = block[ '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'] self.assertEqual( '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')
def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" WRONG_BLOCK = BLOCK[:-4]+'abcd' BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace' DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e' DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab' self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/') # assume one site is cern sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('CH_CERN') > -1] self.assertTrue(sites) #This block is only found on DBS self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK)) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK)) #test bulk call: ## two blocks in phedex self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2]))) ## one block in phedex one does not exist self.assertEqual(1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK]))) ## one in phedex one in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK]))) ## two in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2]))) ## one in DBS and one does not exist self.assertEqual(1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK])))
def setUp(self): """ _setUp_ """ super(WMBSHelperTest, self).setUp() self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault = False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = DBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = threading.currentThread().logger, dbinterface = threading.currentThread().dbi) return
def createFilesetFromDBS(self, collection, filesetName, dbsURL, dataset, mask=None): """ _createFilesetFromDBS_ Get info from DBS, apply mask (filter) and create a fileset """ fileSet = CouchFileset(database=self.database, url=self.url, name=filesetName) fileSet.setCollection(collection) files = [] blockLocations = {} dbsReader = DBSReader(dbsURL, version="DBS_2_0_9", mode="GET") dbsResults = dbsReader.dbs.listFiles(path=dataset, retriveList=["retrive_lumi", "retrive_run"]) logging.info("Found %s files from DBS" % len(dbsResults)) for dbsResult in dbsResults: blockName = dbsResult["Block"]["Name"] if not blockName in blockLocations: blockLocations[blockName] = dbsReader.listFileBlockLocation(blockName) file = File( lfn=dbsResult["LogicalFileName"], size=dbsResult["FileSize"], merged=True, events=dbsResult["NumberOfEvents"], locations=blockLocations[blockName], ) runs = {} for lumi in dbsResult["LumiList"]: runNumber = lumi["RunNumber"] runString = str(runNumber) lumiNumber = lumi["LumiSectionNumber"] if runString in runs: runs[runString].lumis.append(lumiNumber) else: runs[runString] = Run(runNumber, lumiNumber) for run in runs.values(): file.addRun(run) files.append(file) logging.info("Uploading %s files in fileset" % len(files)) fileList = fileSet.add(files, mask) return fileSet, fileList
def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset = DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK) self.assertEqual([173657], runs)
def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName = BLOCK, onlyClosedBlocks = True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block)
def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets("Jet", "RAW") self.assertTrue("Run2011A-v1" in datasets) self.assertTrue("Run2011B-v1" in datasets) self.assertFalse(self.dbs.listProcessedDatasets("Jet", "blah")) self.assertFalse(self.dbs.listProcessedDatasets("blah", "RAW"))
def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" self.dbs = DBSReader(self.endpoint) # assume one site is cern sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x.find('cern.ch') > -1] self.assertTrue(sites) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + 'blah'))
def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset = DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], 2782) runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK) self.assertEqual({173657 : 94}, runs)
def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60') self.assertEqual(len(block), 1) block = block['/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'] self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas')
def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ #endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' self.dbs = DBSReader(endpoint) return
def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0') self.assertEqual(1, len(parents)) self.assertEqual('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0', parents[0]['Name']) sites = [x for x in parents[0]['PhEDExNodeList'] if x.find("CH_CERN") > -1] self.assertTrue(sites) self.assertFalse(self.dbs.listBlockParents('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0'))
def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets("Jet", "RAW", "Run2011A-v1") self.assertEqual(1, len(dataset)) self.assertEqual(["/Jet/Run2011A-v1/RAW"], dataset[0]["PathList"]) self.assertEqual("Run2011A-v1", dataset[0]["Name"]) self.assertFalse(self.dbs.matchProcessedDatasets("Jet", "RAW", "Run2011A-v666"))
def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" # hope PromptReco doesn't get deleted self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60') self.assertEqual(1, len(files)) self.assertEqual('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60', files[0]['Block']['Name']) self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas')
def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60') self.assertEqual(1, len(parents)) self.assertEqual('/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60', parents[0]['Name']) sites = [x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1] self.assertTrue(sites) self.assertFalse(self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl'))