class DBSReaderTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' self.dbs = None return @attr("integration") def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listPrimaryDatasets('Jet*') self.assertTrue('Jet' in results) self.assertTrue('JetMET' in results) self.assertTrue('JetMETTau' in results) self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist')) return @attr("integration") def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1') self.assertEqual(1, len(dataset)) self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList']) self.assertEqual('Run2011A-v1', dataset[0]['Name']) self.assertFalse( self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666')) @attr("integration") def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK) self.assertEqual([173657], runs) @attr("integration") def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], 2782) runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK) self.assertEqual({173657: 94}, runs) @attr("integration") def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets('Jet', 'RAW') self.assertTrue('Run2011A-v1' in datasets) self.assertTrue('Run2011B-v1' in datasets) self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah')) self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW')) @attr("integration") def testlistDatasetFiles(self): """listDatasetFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listDatasetFiles(DATASET) self.assertEqual(49, len(files)) self.assertTrue(FILE in files) @attr("integration") def testlistDatasetFileDetails(self): """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset""" TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root' for endpoint in [ self.endpoint, 'test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py:' ]: self.dbs = DBSReader(endpoint) details = self.dbs.listDatasetFileDetails(DATASET) self.assertEqual(len(details), 49) self.assertTrue(TESTFILE in details) self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545) self.assertEqual(details[TESTFILE]['Size'], 286021145) self.assertEqual( details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace' ) self.assertEqual(details[TESTFILE]['Checksums'], { 'Checksum': '22218315', 'Adler32': 'a41a1446', 'Md5': 'NOTSET' }) self.assertTrue(173658 in details[TESTFILE]['Lumis']) self.assertEqual( sorted(details[TESTFILE]['Lumis'][173658]), sorted( map( long, [8, 12, 9, 14, 10, 6, 2, 1, 4, 3, 36, 49, 16, 11, 27, 35, 46, 39, 20, 24, 52, 23, 40, 42, 45, 21, 32, 37, \ 25, 22, 5, 33, 17, 15, 26, 50, 18, 29, 51, 44, 69, 43, 30, 73, 19, 41, 13, 38, 7, 31, 75, 48, 59, 65, 55, \ 57, 34, 28, 74, 47, 64, 61, 68, 77, 66, 71, 60, 76, 70, 67, 62, 78, 82, 79, 88, 56, 101, 92, 58, 72, 54, \ 63, 96, 53, 84, 95, 89, 85, 99, 81, 91, 102, 80, 100, 107, 94, 93, 90, 86, 87, 83, 97, 104, 110, 111, 106,\ 108, 98, 103, 109, 105])) ) @attr("integration") def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], '22075') self.assertEqual(dataset['NumberOfBlocks'], '46') self.assertEqual(dataset['total_size'], '4001680824') self.assertEqual(dataset['NumberOfFiles'], '49') self.assertEqual(dataset['NumberOfLumis'], '7223') block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], '') self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], '377') self.assertEqual(block['NumberOfBlocks'], '1') self.assertEqual(block['total_size'], '150780132') self.assertEqual(block['NumberOfFiles'], '2') self.assertEqual(block['NumberOfLumis'], '94') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas') @attr("integration") def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block['Name'] in [x['Name'] for x in blocks]) self.assertEqual(BLOCK, block['Name']) #self.assertEqual(377, block['NumberOfEvents']) self.assertEqual(150780132, block['BlockSize']) self.assertEqual(2, block['NumberOfFiles']) # possibly fragile but assume block located at least at cern sites = [ x['Name'] for x in block['StorageElementList'] if x['Name'].find('cern.ch') > -1 ] self.assertTrue(sites) # weird error handling - depends on whether block or dataset is missing self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah') self.assertFalse( self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas')) @attr("integration") def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block) @attr("integration") def testListOpenFileBlocks(self): """listOpenFileBlocks finds open blocks""" # hard to find a dataset with open blocks, so don't bother self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.listOpenFileBlocks(DATASET)) @attr("integration") def testBlockExists(self): """blockExists returns existence of blocks""" self.dbs = DBSReader(self.endpoint) self.assertTrue(self.dbs.blockExists(BLOCK)) self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse')) @attr("integration") def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue( FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah') @attr("integration") def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" # hope PromptReco doesn't get deleted self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60' ) self.assertEqual(1, len(files)) self.assertEqual( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60', files[0]['Block']['Name']) self.assertEqual( '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas') @attr("integration") def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK)) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas') @attr("integration") def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" WRONG_BLOCK = BLOCK[:-4] + 'abcd' BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace' DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e' DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab' self.dbs = DBSReader( 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/') # assume one site is cern sites = [ x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('cern.ch') > -1 ] self.assertTrue(sites) #This block is only found on DBS self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK)) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK)) #test bulk call: ## two blocks in phedex self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2]))) ## one block in phedex one does not exist self.assertEqual( 1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK]))) ## one in phedex one in dbs self.assertEqual( 2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK]))) ## two in dbs self.assertEqual( 2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2]))) ## one in DBS and one does not exist self.assertEqual( 1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK]))) @attr("integration") def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas') @attr("integration") def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60' ) self.assertEqual(len(block), 1) block = block[ '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'] self.assertEqual( '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas') @attr("integration") def testGetFiles(self): """getFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.getFiles(DATASET) self.assertEqual(len(files), 46) @attr("integration") def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60' ) self.assertEqual(1, len(parents)) self.assertEqual( '/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60', parents[0]['Name']) sites = [ x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1 ] self.assertTrue(sites) self.assertFalse( self.dbs.listBlockParents( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl' )) @attr("integration") def testBlockIsOpen(self): """blockIsOpen checks if a block is open""" self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.blockIsOpen(BLOCK)) @attr("integration") def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.dbs = DBSReader(self.endpoint) self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET) self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
class WMBSHelperTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ """ super(WMBSHelperTest, self).setUp() self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules=["WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault=False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = DBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=threading.currentThread().logger, dbinterface=threading.currentThread().dbi) self.configFile = EmulatorSetup.setupWMAgentConfig() self.config = loadConfigurationFile(self.configFile) self.config.component_("JobSubmitter") self.config.JobSubmitter.submitDir = self.workDir self.config.JobSubmitter.submitScript = os.path.join(getTestBase(), 'WMComponent_t/JobSubmitter_t', 'submit.sh') return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() EmulatorSetup.deleteConfig(self.configFile) super(WMBSHelperTest, self).tearDown() return def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) dummyLocationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName='site1', taskType='Processing', \ maxSlots=10000, pendingSlots=10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual(list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job=1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule="OutputA", dataTier='DataTierA') mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size=1, max_merge_size=2, max_merge_events=3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule="OutputA", dataTier="DataTierA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBased", files_per_job=50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule="Merged", dataTier="DataTierA") skimTask.setSplittingAlgorithm("FileBased", files_per_job=1, include_parents=True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierB", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) return testWorkload def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = SiteDBJSON() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname="Locations.New") self.pnns = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName=site, pnn=self.siteDB.cmsNametoPhEDExNode(site)[0]) self.pnns.append(self.siteDB.cmsNametoPhEDExNode(site)[0]) def createWMSpec(self, name='ReRecoWorkload'): factory = ReRecoWorkloadFactory() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") wmspec.setSubscriptionInformation(custodialSites=[], nonCustodialSites=[], autoApproveSites=[], priority="Low", custodialSubType="Move") return wmspec def createMCWMSpec(self, name='MonteCarloWorkload'): mcArgs['CouchDBName'] = rerecoArgs["CouchDBName"] mcArgs["ConfigCacheID"] = createConfig(mcArgs["CouchDBName"]) wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = DBSReader(inputDataset.dbsurl) # dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask=None, parentFlag=False, detail=False): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, topLevelTask.name(), block, mask, cachepath=self.workDir) if block: if parentFlag: block = self.dbs.getFileBlockWithParents(block)[block] else: block = self.dbs.getFileBlock(block)[block] sub, files = wmbs.createSubscriptionAndAddFiles(block=block) if detail: return wmbs, sub, files else: return wmbs def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ baAPI = BossAirAPI(config=self.config, insertStates=True) # Create nine jobs self.setupForKillTest(baAPI=baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", self.config, self.config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) return def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', pnn='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputADataTierA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputADataTierA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-MergedDataTierA", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputADataTierA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["MergedDataTierA"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-MergedDataTierA", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputADataTierA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputADataTierA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputBDataTierB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputBDataTierB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputADataTierA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputADataTierA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputBDataTierB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputBDataTierB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', pnn='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS(task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["MergedDataTierA"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-MergedDataTierA", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputADataTierA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputADataTierA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputBDataTierB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputBDataTierB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputADataTierA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputADataTierA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputBDataTierB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputBDataTierB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#" + BLOCK2 self.topLevelTask.setInputRunBlacklist([181183]) # Set run blacklist to only run in the block wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#" + BLOCK2 self.topLevelTask.setInputRunWhitelist([181183]) # Set run whitelist to only run in the block wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 1) def testLumiMaskRestrictionsOK(self): block = self.dataset + "#" + BLOCK1 self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['181367'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['57,80'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 1) def testLumiMaskRestrictionsKO(self): block = self.dataset + "#" + BLOCK1 self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['123454321'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['123,123'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#" + BLOCK1 wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) # Not clear what's supposed to happen here, 2nd test is completely redundant dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. # Not clear what's supposed to happen here, 2nd test is completely redundant numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) def testParentage(self): """ 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ # Swap out the dataset for one that has parents task = next(self.wmspec.taskIterator()) oldDS = task.inputDataset() # Copy the old dataset, only will use DBS URL from it task.addInputDataset(name="/Cosmics/ComissioningHI-PromptReco-v1/RECO", primary='Cosmics', processed='ComissioningHI-PromptReco-v1', tier='RECO', dbsurl=oldDS.dbsurl) block = '/Cosmics/ComissioningHI-PromptReco-v1/RECO' + '#5b89ba9c-0dbf-11e1-9b6c-003048caaace' # File creation without parents wmbs, _, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=False, detail=True) self.assertEqual(8, numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: self.assertEqual(len(child["parents"]), 0) # no parents per child # File creation with parents wmbs, _, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=True, detail=True) self.assertEqual(8, numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: self.assertEqual(len(child["parents"]), 1) # one parent per child def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" # This test is failing because the name of the couch DB is set to None # in TestMonteCarloWorkloadFactory.getMCArgs() but changing it to # "reqmgr_config_cache_t" from StdBase test arguments does not fix the # situation. testDuplicateSubscription probably has the same issue self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) firstFile = list(fileset.files)[0] self.assertEqual(firstFile['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(firstFile['merged'], False) # merged files get added to dbs self.assertEqual(len(firstFile['parents']), 0) # firstFile.loadData() self.assertEqual(sorted(firstFile['locations']), sorted(self.pnns)) self.assertEqual(len(firstFile.getParentLFNs()), 0) self.assertEqual(len(firstFile.getRuns()), 1) run = firstFile.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
class DBSReaderTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' self.dbs = None return @attr("integration") def testListDatatiers(self): """ listDatatiers returns all datatiers available """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listDatatiers() self.assertTrue('RAW' in results) self.assertTrue('GEN-SIM-RECO' in results) self.assertTrue('GEN-SIM' in results) self.assertFalse('RAW-ALAN' in results) return @attr("integration") def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listPrimaryDatasets('Jet*') self.assertTrue('Jet' in results) self.assertTrue('JetMET' in results) self.assertTrue('JetMETTau' in results) self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist')) return @attr("integration") def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1') self.assertEqual(1, len(dataset)) self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList']) self.assertEqual('Run2011A-v1', dataset[0]['Name']) self.assertFalse( self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666')) def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(block=BLOCK) self.assertEqual(1, len(runs)) self.assertEqual([173657], runs) def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts (None for DBS3)""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], None) runs = self.dbs.listRunLumis(block=BLOCK) self.assertEqual(1, len(runs)) self.assertTrue(173657 in runs) self.assertEqual(runs[173657], None) @attr("integration") def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets('Jet', 'RAW') self.assertTrue('Run2011A-v1' in datasets) self.assertTrue('Run2011B-v1' in datasets) self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah')) self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW')) def testlistDatasetFiles(self): """listDatasetFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listDatasetFiles(DATASET) self.assertEqual(49, len(files)) self.assertTrue(FILE in files) def testlistDatasetFileDetails(self): """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset""" TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root' self.dbs = DBSReader(self.endpoint) details = self.dbs.listDatasetFileDetails(DATASET) self.assertEqual(len(details), 49) self.assertTrue(TESTFILE in details) self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545) self.assertEqual(details[TESTFILE]['file_size'], 286021145) self.assertEqual( details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace') self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['Checksum'], '22218315') self.assertEqual(details[TESTFILE]['check_sum'], '22218315') self.assertTrue(173658 in details[TESTFILE]['Lumis']) self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]), [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111 ]) def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], 22075) self.assertEqual(dataset['NumberOfBlocks'], 46) self.assertEqual(dataset['FileSize'], 4001680824) self.assertEqual(dataset['file_size'], 4001680824) self.assertEqual(dataset['NumberOfFiles'], 49) self.assertEqual(dataset['NumberOfLumis'], 7223) block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], '') self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], 377) self.assertEqual(block['NumberOfBlocks'], 1) self.assertEqual(block['FileSize'], 150780132) self.assertEqual(block['file_size'], 150780132) self.assertEqual(block['NumberOfFiles'], 2) self.assertEqual(block['NumberOfLumis'], 94) self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas') @attr("integration") def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block['Name'] in [x['Name'] for x in blocks]) self.assertEqual(BLOCK, block['Name']) self.assertEqual(0, block['OpenForWriting']) self.assertEqual(150780132, block['BlockSize']) self.assertEqual(2, block['NumberOfFiles']) # possibly fragile but assume block located at least at cern sites = [ x['Name'] for x in block['PhEDExNodeList'] if x['Name'].find('CH_CERN') > -1 ] self.assertTrue(sites) # weird error handling - depends on whether block or dataset is missing self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET, blockName=BLOCK + 'asas') def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) self.assertTrue(BLOCK in blocks) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block) def testListOpenFileBlocks(self): """listOpenFileBlocks finds open blocks""" # hard to find a dataset with open blocks, so don't bother self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.listOpenFileBlocks(DATASET)) def testBlockExists(self): """blockExists returns existence of blocks""" self.dbs = DBSReader(self.endpoint) self.assertTrue(self.dbs.blockExists(BLOCK)) self.assertRaises(DBSReaderError, self.dbs.blockExists, DATASET + '#somethingelse') def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue( FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah') def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents( '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0' ) self.assertEqual(4, len(files)) self.assertEqual( '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['block_name']) self.assertEqual( '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['BlockName']) self.assertEqual( '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas') def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue( FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas') @attr("integration") def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" WRONG_BLOCK = BLOCK[:-4] + 'abcd' BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace' DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e' DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab' self.dbs = DBSReader( 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/') # assume one site is cern sites = [ x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('CH_CERN') > -1 ] self.assertTrue(sites) #This block is only found on DBS self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK)) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK)) #test bulk call: ## two blocks in phedex self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2]))) ## one block in phedex one does not exist self.assertEqual( 1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK]))) ## one in phedex one in dbs self.assertEqual( 2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK]))) ## two in dbs self.assertEqual( 2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2]))) ## one in DBS and one does not exist self.assertEqual( 1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK]))) def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas') def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents( '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0' ) self.assertEqual(len(block), 1) block = block[ '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0'] self.assertEqual( '/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root', block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas') def testGetFiles(self): """getFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.getFiles(DATASET) self.assertEqual(len(files), 46) def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents( '/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0' ) self.assertEqual(1, len(parents)) self.assertEqual( '/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0', parents[0]['Name']) sites = [ x for x in parents[0]['PhEDExNodeList'] if x.find("CH_CERN") > -1 ] self.assertTrue(sites) self.assertFalse( self.dbs.listBlockParents( '/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0' )) def testBlockIsOpen(self): """blockIsOpen checks if a block is open""" self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.blockIsOpen(BLOCK)) def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.dbs = DBSReader(self.endpoint) self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET) self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath, BLOCK + 'asas')
class DBSReaderTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' self.dbs = None return @attr("integration") def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listPrimaryDatasets('Jet*') self.assertTrue('Jet' in results) self.assertTrue('JetMET' in results) self.assertTrue('JetMETTau' in results) self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist')) return @attr("integration") def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1') self.assertEqual(1, len(dataset)) self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList']) self.assertEqual('Run2011A-v1', dataset[0]['Name']) self.assertFalse( self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666')) @attr("integration") def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK) self.assertEqual([173657], runs) @attr("integration") def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], 2782) runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK) self.assertEqual({173657: 94}, runs) @attr("integration") def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets('Jet', 'RAW') self.assertTrue('Run2011A-v1' in datasets) self.assertTrue('Run2011B-v1' in datasets) self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah')) self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW')) @attr("integration") def testlistDatasetFiles(self): """listDatasetFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listDatasetFiles(DATASET) self.assertEqual(49, len(files)) self.assertTrue(FILE in files) @attr("integrtion") def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], '22075') self.assertEqual(dataset['NumberOfBlocks'], '46') self.assertEqual(dataset['total_size'], '4001680824') self.assertEqual(dataset['NumberOfFiles'], '49') self.assertEqual(dataset['NumberOfLumis'], '7223') block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], '') self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], '377') self.assertEqual(block['NumberOfBlocks'], '1') self.assertEqual(block['total_size'], '150780132') self.assertEqual(block['NumberOfFiles'], '2') self.assertEqual(block['NumberOfLumis'], '94') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas') @attr("integration") def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block['Name'] in [x['Name'] for x in blocks]) self.assertEqual(BLOCK, block['Name']) #self.assertEqual(377, block['NumberOfEvents']) self.assertEqual(150780132, block['BlockSize']) self.assertEqual(2, block['NumberOfFiles']) # possibly fragile but assume block located at least at cern sites = [ x['Name'] for x in block['StorageElementList'] if x['Name'].find('cern.ch') > -1 ] self.assertTrue(sites) # weird error handling - depends on whether block or dataset is missing self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah') self.assertFalse( self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + 'asas')) @attr("integration") def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block) @attr("integration") def testListOpenFileBlocks(self): """listOpenFileBlocks finds open blocks""" # hard to find a dataset with open blocks, so don't bother self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.listOpenFileBlocks(DATASET)) @attr("integration") def testBlockExists(self): """blockExists returns existence of blocks""" self.dbs = DBSReader(self.endpoint) self.assertTrue(self.dbs.blockExists(BLOCK)) self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse')) @attr("integration") def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue( FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah') @attr("integration") def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" # hope PromptReco doesn't get deleted self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60' ) self.assertEqual(1, len(files)) self.assertEqual( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60', files[0]['Block']['Name']) self.assertEqual( '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas') @attr("integration") def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK)) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas') @attr("integration") def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" self.dbs = DBSReader(self.endpoint) # assume one site is cern sites = [ x for x in self.dbs.listFileBlockLocation(BLOCK) if x.find('cern.ch') > -1 ] self.assertTrue(sites) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + 'blah')) @attr("integration") def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas') @attr("integration") def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60' ) self.assertEqual(len(block), 1) block = block[ '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'] self.assertEqual( '/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas') @attr("integration") def testGetFiles(self): """getFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.getFiles(DATASET) self.assertEqual(len(files), 46) @attr("integration") def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60' ) self.assertEqual(1, len(parents)) self.assertEqual( '/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60', parents[0]['Name']) sites = [ x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1 ] self.assertTrue(sites) self.assertFalse( self.dbs.listBlockParents( '/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl' )) @attr("integration") def testBlockIsOpen(self): """blockIsOpen checks if a block is open""" self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.blockIsOpen(BLOCK)) @attr("integration") def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.dbs = DBSReader(self.endpoint) self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET) self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
class DBSReaderTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ #endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' self.dbs = DBSReader(endpoint) return @attr("integration") def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ results = self.dbs.listPrimaryDatasets('Jet*') self.assertTrue('Jet' in results) self.assertTrue('JetMET' in results) self.assertTrue('JetMETTau' in results) self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist')) return @attr("integration") def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1') self.assertEqual(1, len(dataset)) self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList']) self.assertEqual('Run2011A-v1', dataset[0]['Name']) self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666')) @attr("integration") def testlistRuns(self): """listRuns returns known runs""" runs = self.dbs.listRuns(dataset = DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK) self.assertEqual([173657], runs) @attr("integration") def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" datasets = self.dbs.listProcessedDatasets('Jet', 'RAW') self.assertTrue('Run2011A-v1' in datasets) self.assertTrue('Run2011B-v1' in datasets) self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah')) self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW')) @attr("integration") def testlistDatasetFiles(self): """listDatasetFiles returns files in dataset""" files = self.dbs.listDatasetFiles(DATASET) self.assertEqual(49, len(files)) self.assertTrue(FILE in files) @attr("integrtion") def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], '22075') self.assertEqual(dataset['NumberOfBlocks'], '46') self.assertEqual(dataset['total_size'], '4001680824') self.assertEqual(dataset['NumberOfFiles'], '49') self.assertEqual(dataset['NumberOfLumis'], '7223') block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], '') self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], '377') self.assertEqual(block['NumberOfBlocks'], '1') self.assertEqual(block['total_size'], '150780132') self.assertEqual(block['NumberOfFiles'], '2') self.assertEqual(block['NumberOfLumis'], '94') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas') @attr("integration") def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block['Name'] in [x['Name'] for x in blocks]) self.assertEqual(BLOCK, block['Name']) #self.assertEqual(377, block['NumberOfEvents']) self.assertEqual(150780132, block['BlockSize']) self.assertEqual(2, block['NumberOfFiles']) # possibly fragile but assume block located at least at cern sites = [x['Name'] for x in block['StorageElementList'] if x['Name'].find('cern.ch') > -1] self.assertTrue(sites) # weird error handling - depends on whether block or dataset is missing self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah') self.assertFalse(self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK + 'asas')) @attr("integration") def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" blocks = self.dbs.listFileBlocks(DATASET) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName = BLOCK, onlyClosedBlocks = True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block) @attr("integration") def testListOpenFileBlocks(self): """listOpenFileBlocks finds open blocks""" # hard to find a dataset with open blocks, so don't bother self.assertFalse(self.dbs.listOpenFileBlocks(DATASET)) @attr("integration") def testBlockExists(self): """blockExists returns existence of blocks""" self.assertTrue(self.dbs.blockExists(BLOCK)) self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse')) @attr("integration") def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah') @attr("integration") def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" # hope PromptReco doesn't get deleted files = self.dbs.listFilesInBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60') self.assertEqual(1, len(files)) self.assertEqual('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60', files[0]['Block']['Name']) self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas') @attr("integration") def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK)) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas') @attr("integration") def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" # assume one site is cern sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x.find('cern.ch') > -1] self.assertTrue(sites) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + 'blah')) @attr("integration") def testGetFileBlock(self): """getFileBlock returns block""" block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas') @attr("integration") def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" block = self.dbs.getFileBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60') self.assertEqual(len(block), 1) block = block['/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'] self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas') @attr("integration") def testGetFiles(self): """getFiles returns files in dataset""" files = self.dbs.getFiles(DATASET) self.assertEqual(len(files), 46) @attr("integration") def testListBlockParents(self): """listBlockParents returns block parents""" parents = self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60') self.assertEqual(1, len(parents)) self.assertEqual('/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60', parents[0]['Name']) sites = [x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1] self.assertTrue(sites) self.assertFalse(self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl')) @attr("integration") def testBlockIsOpen(self): """blockIsOpen checks if a block is open""" self.assertFalse(self.dbs.blockIsOpen(BLOCK)) @attr("integration") def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET) self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
class DBSReaderTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' self.dbs = None super(DBSReaderTest, self).setUp() return def tearDown(self): """ _tearDown_ :return: """ super(DBSReaderTest, self).tearDown() return @attr("integration") def testListDatatiers(self): """ listDatatiers returns all datatiers available """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listDatatiers() self.assertTrue('RAW' in results) self.assertTrue('GEN-SIM-RECO' in results) self.assertTrue('GEN-SIM' in results) self.assertFalse('RAW-ALAN' in results) return @attr("integration") def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listPrimaryDatasets('Jet*') self.assertTrue('Jet' in results) self.assertTrue('JetMET' in results) self.assertTrue('JetMETTau' in results) self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist')) return @attr("integration") def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1') self.assertEqual(1, len(dataset)) self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList']) self.assertEqual('Run2011A-v1', dataset[0]['Name']) self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666')) def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(block=BLOCK) self.assertEqual(1, len(runs)) self.assertEqual([173657], runs) def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts (None for DBS3)""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], None) runs = self.dbs.listRunLumis(block=BLOCK) self.assertEqual(1, len(runs)) self.assertTrue(173657 in runs) self.assertEqual(runs[173657], None) @attr("integration") def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets('Jet', 'RAW') self.assertTrue('Run2011A-v1' in datasets) self.assertTrue('Run2011B-v1' in datasets) self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah')) self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW')) def testlistDatasetFiles(self): """listDatasetFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listDatasetFiles(DATASET) self.assertEqual(49, len(files)) self.assertTrue(FILE in files) def testlistDatasetFileDetails(self): """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset""" TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root' self.dbs = DBSReader(self.endpoint) details = self.dbs.listDatasetFileDetails(DATASET) self.assertEqual(len(details), 49) self.assertTrue(TESTFILE in details) self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545) self.assertEqual(details[TESTFILE]['file_size'], 286021145) self.assertEqual(details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace') self.assertEqual(details[TESTFILE]['Md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['md5'], 'NOTSET') self.assertEqual(details[TESTFILE]['Adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['adler32'], 'a41a1446') self.assertEqual(details[TESTFILE]['Checksum'], '22218315') self.assertEqual(details[TESTFILE]['check_sum'], '22218315') self.assertTrue(173658 in details[TESTFILE]['Lumis']) self.assertEqual(sorted(details[TESTFILE]['Lumis'][173658]), [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111]) def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], 22075) self.assertEqual(dataset['NumberOfBlocks'], 46) self.assertEqual(dataset['FileSize'], 4001680824) self.assertEqual(dataset['file_size'], 4001680824) self.assertEqual(dataset['NumberOfFiles'], 49) self.assertEqual(dataset['NumberOfLumis'], 7223) block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], '') self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], 377) self.assertEqual(block['NumberOfBlocks'], 1) self.assertEqual(block['FileSize'], 150780132) self.assertEqual(block['file_size'], 150780132) self.assertEqual(block['NumberOfFiles'], 2) self.assertEqual(block['NumberOfLumis'], 94) self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas') @attr("integration") def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block['Name'] in [x['Name'] for x in blocks]) self.assertEqual(BLOCK, block['Name']) self.assertEqual(0, block['OpenForWriting']) self.assertEqual(150780132, block['BlockSize']) self.assertEqual(2, block['NumberOfFiles']) # possibly fragile but assume block located at least at cern sites = [x['Name'] for x in block['PhEDExNodeList'] if x['Name'].find('CH_CERN') > -1] self.assertTrue(sites) # weird error handling - depends on whether block or dataset is missing self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET, blockName=BLOCK + 'asas') def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) self.assertTrue(BLOCK in blocks) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block) def testListOpenFileBlocks(self): """listOpenFileBlocks finds open blocks""" # hard to find a dataset with open blocks, so don't bother self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.listOpenFileBlocks(DATASET)) def testBlockExists(self): """blockExists returns existence of blocks""" self.dbs = DBSReader(self.endpoint) self.assertTrue(self.dbs.blockExists(BLOCK)) self.assertRaises(DBSReaderError, self.dbs.blockExists, DATASET + '#somethingelse') def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah') def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0') self.assertEqual(4, len(files)) self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['block_name']) self.assertEqual('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0', files[0]['BlockName']) self.assertEqual('/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas') def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in [x['logical_file_name'] for x in self.dbs.lfnsInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas') @attr("integration") def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" WRONG_BLOCK = BLOCK[:-4]+'abcd' BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace' DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e' DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab' self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/') # assume one site is cern sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('CH_CERN') > -1] self.assertTrue(sites) #This block is only found on DBS self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK)) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK)) #test bulk call: ## two blocks in phedex self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2]))) ## one block in phedex one does not exist self.assertEqual(1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK]))) ## one in phedex one in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK]))) ## two in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2]))) ## one in DBS and one does not exist self.assertEqual(1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK]))) def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas') def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0') self.assertEqual(len(block), 1) block = block['/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0'] self.assertEqual('/store/data/Commissioning2015/Cosmics/RAW/v1/000/238/545/00000/1043E89F-2DCF-E411-9CAE-02163E013751.root', block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas') def testGetFiles(self): """getFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.getFiles(DATASET) self.assertEqual(len(files), 46) def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents('/Cosmics/Commissioning2015-PromptReco-v1/RECO#004ac3ba-d09e-11e4-afad-001e67ac06a0') self.assertEqual(1, len(parents)) self.assertEqual('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0', parents[0]['Name']) sites = [x for x in parents[0]['PhEDExNodeList'] if x.find("CH_CERN") > -1] self.assertTrue(sites) self.assertFalse(self.dbs.listBlockParents('/Cosmics/Commissioning2015-v1/RAW#942d76fe-cf0e-11e4-afad-001e67ac06a0')) def testBlockIsOpen(self): """blockIsOpen checks if a block is open""" self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.blockIsOpen(BLOCK)) def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.dbs = DBSReader(self.endpoint) self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET) self.assertRaises(DBSReaderError, self.dbs.blockToDatasetPath, BLOCK + 'asas')
class DBSReaderTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ #self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" self.endpoint = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' self.dbs = None return @attr("integration") def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listPrimaryDatasets('Jet*') self.assertTrue('Jet' in results) self.assertTrue('JetMET' in results) self.assertTrue('JetMETTau' in results) self.assertFalse(self.dbs.listPrimaryDatasets('DoesntExist')) return @attr("integration") def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v1') self.assertEqual(1, len(dataset)) self.assertEqual(['/Jet/Run2011A-v1/RAW'], dataset[0]['PathList']) self.assertEqual('Run2011A-v1', dataset[0]['Name']) self.assertFalse(self.dbs.matchProcessedDatasets('Jet', 'RAW', 'Run2011A-v666')) @attr("integration") def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset = DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK) self.assertEqual([173657], runs) @attr("integration") def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset = DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], 2782) runs = self.dbs.listRuns(dataset = DATASET, block = BLOCK) self.assertEqual({173657 : 94}, runs) @attr("integration") def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets('Jet', 'RAW') self.assertTrue('Run2011A-v1' in datasets) self.assertTrue('Run2011B-v1' in datasets) self.assertFalse(self.dbs.listProcessedDatasets('Jet', 'blah')) self.assertFalse(self.dbs.listProcessedDatasets('blah', 'RAW')) @attr("integration") def testlistDatasetFiles(self): """listDatasetFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listDatasetFiles(DATASET) self.assertEqual(49, len(files)) self.assertTrue(FILE in files) @attr("integration") def testlistDatasetFileDetails(self): """testlistDatasetFilesDetails returns lumis, events, and parents of a dataset""" TESTFILE = '/store/data/Run2011A/HighPileUp/RAW/v1/000/173/658/56484BAB-CBCB-E011-AF00-BCAEC518FF56.root' for endpoint in [self.endpoint, 'test/python/WMCore_t/Services_t/DBS_t/DBSReader_t.py:']: self.dbs = DBSReader(endpoint) details = self.dbs.listDatasetFileDetails(DATASET) self.assertEqual(len(details), 49) self.assertTrue(TESTFILE in details) self.assertEqual(details[TESTFILE]['NumberOfEvents'], 545) self.assertEqual(details[TESTFILE]['Size'], 286021145) self.assertEqual(details[TESTFILE]['BlockName'], '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace') self.assertEqual(details[TESTFILE]['Checksums'], {'Checksum': '22218315', 'Adler32': 'a41a1446', 'Md5': 'NOTSET'} ) self.assertTrue( 173658 in details[TESTFILE]['Lumis']) self.assertEqual( sorted(details[TESTFILE]['Lumis'][173658]), sorted( map( long, [8, 12, 9, 14, 10, 6, 2, 1, 4, 3, 36, 49, 16, 11, 27, 35, 46, 39, 20, 24, 52, 23, 40, 42, 45, 21, 32, 37, \ 25, 22, 5, 33, 17, 15, 26, 50, 18, 29, 51, 44, 69, 43, 30, 73, 19, 41, 13, 38, 7, 31, 75, 48, 59, 65, 55, \ 57, 34, 28, 74, 47, 64, 61, 68, 77, 66, 71, 60, 76, 70, 67, 62, 78, 82, 79, 88, 56, 101, 92, 58, 72, 54, \ 63, 96, 53, 84, 95, 89, 85, 99, 81, 91, 102, 80, 100, 107, 94, 93, 90, 86, 87, 83, 97, 104, 110, 111, 106,\ 108, 98, 103, 109, 105])) ) @attr("integration") def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset['path'], DATASET) self.assertEqual(dataset['block'], '') self.assertEqual(dataset['NumberOfEvents'], '22075') self.assertEqual(dataset['NumberOfBlocks'], '46') self.assertEqual(dataset['total_size'], '4001680824') self.assertEqual(dataset['NumberOfFiles'], '49') self.assertEqual(dataset['NumberOfLumis'], '7223') block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block['path'], '') self.assertEqual(block['block'], BLOCK) self.assertEqual(block['NumberOfEvents'], '377') self.assertEqual(block['NumberOfBlocks'], '1') self.assertEqual(block['total_size'], '150780132') self.assertEqual(block['NumberOfFiles'], '2') self.assertEqual(block['NumberOfLumis'], '94') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + 'blah') self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + 'asas') @attr("integration") def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block['Name'] in [x['Name'] for x in blocks]) self.assertEqual(BLOCK, block['Name']) #self.assertEqual(377, block['NumberOfEvents']) self.assertEqual(150780132, block['BlockSize']) self.assertEqual(2, block['NumberOfFiles']) # possibly fragile but assume block located at least at cern sites = [x['Name'] for x in block['StorageElementList'] if x['Name'].find('cern.ch') > -1] self.assertTrue(sites) # weird error handling - depends on whether block or dataset is missing self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + 'blah') self.assertFalse(self.dbs.getFileBlocksInfo(DATASET, blockName = BLOCK + 'asas')) @attr("integration") def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName = BLOCK, onlyClosedBlocks = True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block) @attr("integration") def testListOpenFileBlocks(self): """listOpenFileBlocks finds open blocks""" # hard to find a dataset with open blocks, so don't bother self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.listOpenFileBlocks(DATASET)) @attr("integration") def testBlockExists(self): """blockExists returns existence of blocks""" self.dbs = DBSReader(self.endpoint) self.assertTrue(self.dbs.blockExists(BLOCK)) self.assertFalse(self.dbs.blockExists(DATASET + '#somethingelse')) @attr("integration") def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in [x['LogicalFileName'] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + '#blah') @attr("integration") def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" # hope PromptReco doesn't get deleted self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60') self.assertEqual(1, len(files)) self.assertEqual('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60', files[0]['Block']['Name']) self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', files[0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + 'asas') @attr("integration") def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK)) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + 'asas') @attr("integration") def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" WRONG_BLOCK = BLOCK[:-4]+'abcd' BLOCK2 = '/HighPileUp/Run2011A-v1/RAW#6021175e-cbfb-11e0-80a9-003048caaace' DBS_BLOCK = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#fb978442-a61b-413a-b4f4-526e6cdb142e' DBS_BLOCK2 = '/GenericTTbar/hernan-140317_231446_crab_JH_ASO_test_T2_ES_CIEMAT_5000_100_140318_0014-'+\ 'ea0972193530f531086947d06eb0f121/USER#0b04d417-d734-4ef2-88b0-392c48254dab' self.dbs = DBSReader('https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/') # assume one site is cern sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x and x.find('cern.ch') > -1] self.assertTrue(sites) #This block is only found on DBS self.assertTrue(self.dbs.listFileBlockLocation(DBS_BLOCK)) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(WRONG_BLOCK)) #test bulk call: ## two blocks in phedex self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, BLOCK2]))) ## one block in phedex one does not exist self.assertEqual(1, len(self.dbs.listFileBlockLocation([BLOCK, WRONG_BLOCK]))) ## one in phedex one in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([BLOCK, DBS_BLOCK]))) ## two in dbs self.assertEqual(2, len(self.dbs.listFileBlockLocation([DBS_BLOCK, DBS_BLOCK2]))) ## one in DBS and one does not exist self.assertEqual(1, len(self.dbs.listFileBlockLocation([DBS_BLOCK, WRONG_BLOCK]))) @attr("integration") def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block['Files'])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + 'asas') @attr("integration") def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60') self.assertEqual(len(block), 1) block = block['/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60'] self.assertEqual('/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root', block['Files'][0]['ParentList'][0]['LogicalFileName']) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + 'asas') @attr("integration") def testGetFiles(self): """getFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.getFiles(DATASET) self.assertEqual(len(files), 46) @attr("integration") def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60') self.assertEqual(1, len(parents)) self.assertEqual('/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60', parents[0]['Name']) sites = [x for x in parents[0]['StorageElementList'] if x.find("cern.ch") > -1] self.assertTrue(sites) self.assertFalse(self.dbs.listBlockParents('/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl')) @attr("integration") def testBlockIsOpen(self): """blockIsOpen checks if a block is open""" self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.blockIsOpen(BLOCK)) @attr("integration") def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.dbs = DBSReader(self.endpoint) self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET) self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + 'asas'))
class WMBSHelperTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ """ super(WMBSHelperTest, self).setUp() self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault = False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = DBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = threading.currentThread().logger, dbinterface = threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() super(WMBSHelperTest, self).tearDown() return def setupForKillTest(self, baAPI = None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) dummyLocationAction = daoFactory(classname = "Locations.New") changeStateAction = daoFactory(classname = "Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', pnn = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname = "Users.New") userAction.execute(dn = userDN, group_name = 'DEFAULT', role_name = 'DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations = "goodse.cern.ch") inputFileB = File("lfnB", locations = "goodse.cern.ch") inputFileC = File("lfnC", locations = "goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations = "goodse.cern.ch") unmergedFileB = File("ulfnB", locations = "goodse.cern.ch") unmergedFileC = File("ulfnC", locations = "goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset = inputFileset, workflow = mainProcWorkflow, type = "Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription = self.mainProcSub) procJobGroup.create() self.procJobA = Job(name = "ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name = "ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name = "ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset = unmergedOutputFileset, workflow = mainProcMergeWorkflow, type = "Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription = self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name = "MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name = "MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name = "MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset = unmergedOutputFileset, workflow = mainCleanupWorkflow, type = "Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription = self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name = "CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name = "CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name = "CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs = jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations = "goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec = "spec2", owner = "Steve", name = "Bogus", task = "Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset = bogusFileset, workflow = bogusWorkflow, type = "Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual(list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job = 1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size = 1, max_merge_size = 2, max_merge_events = 3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBased", files_per_job = 50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") dummyCleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule = "Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job = 1, include_parents = True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) return testWorkload def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = SiteDBJSON() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname = "Locations.New") self.pnns = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName = site, pnn = self.siteDB.cmsNametoPhEDExNode(site)[0]) self.pnns.append(self.siteDB.cmsNametoPhEDExNode(site)[0]) def createWMSpec(self, name = 'ReRecoWorkload'): factory = ReRecoWorkloadFactory() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") wmspec.setSubscriptionInformation(custodialSites = [], nonCustodialSites = [], autoApproveSites = [], priority = "Low", custodialSubType = "Move") return wmspec def createMCWMSpec(self, name='MonteCarloWorkload'): mcArgs['CouchDBName'] = rerecoArgs["CouchDBName"] mcArgs["ConfigCacheID"] = createConfig(mcArgs["CouchDBName"]) wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = DBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask = None, parentFlag = False, detail = False): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, topLevelTask.name(), block, mask, cachepath = self.workDir) if block: if parentFlag: block = self.dbs.getFileBlockWithParents(block)[block] else: block = self.dbs.getFileBlock(block)[block] sub, files = wmbs.createSubscriptionAndAddFiles(block = block) if detail: return wmbs, sub, files else: return wmbs def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config = config) # Create nine jobs self.setupForKillTest(baAPI = baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', pnn = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', pnn = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath = self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', pnn = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', pnn = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath = self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS(task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#" + BLOCK2 self.topLevelTask.setInputRunBlacklist([181183]) # Set run blacklist to only run in the block wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#" + BLOCK2 self.topLevelTask.setInputRunWhitelist([181183]) # Set run whitelist to only run in the block wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 1) def testLumiMaskRestrictionsOK(self): block = self.dataset + "#" + BLOCK1 self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['181367'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['57,80'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 1) def testLumiMaskRestrictionsKO(self): block = self.dataset + "#" + BLOCK1 self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['123454321'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['123,123'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#" + BLOCK1 wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) # Not clear what's supposed to happen here, 2nd test is completely redundant dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. # Not clear what's supposed to happen here, 2nd test is completely redundant numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) def testParentage(self): """ 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ # Swap out the dataset for one that has parents task = next(self.wmspec.taskIterator()) oldDS = task.inputDataset() # Copy the old dataset, only will use DBS URL from it task.addInputDataset(dbsurl=oldDS.dbsurl, primary='Cosmics', processed='ComissioningHI-PromptReco-v1', tier='RECO') block = '/Cosmics/ComissioningHI-PromptReco-v1/RECO' + '#5b89ba9c-0dbf-11e1-9b6c-003048caaace' # File creation without parents wmbs, _, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=False, detail=True) self.assertEqual(8, numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: self.assertEqual(len(child["parents"]), 0) # no parents per child # File creation with parents wmbs, _, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=True, detail=True) self.assertEqual(8, numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: self.assertEqual(len(child["parents"]), 1) # one parent per child def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" # This test is failing because the name of the couch DB is set to None # in TestMonteCarloWorkloadFactory.getMCArgs() but changing it to # "reqmgr_config_cache_t" from StdBase test arguments does not fix the # situation. testDuplicateSubscription probably has the same issue self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) firstFile = list(fileset.files)[0] self.assertEqual(firstFile['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(firstFile['merged'], False) # merged files get added to dbs self.assertEqual(len(firstFile['parents']), 0) #firstFile.loadData() self.assertEqual(sorted(firstFile['locations']), sorted(self.pnns)) self.assertEqual(len(firstFile.getParentLFNs()), 0) self.assertEqual(len(firstFile.getRuns()), 1) run = firstFile.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
class DBSReaderTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the PhEDEx API to point at the test server. """ # self.endpoint = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" self.endpoint = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader" self.dbs = None return @attr("integration") def testListPrimaryDatasets(self): """ listPrimaryDatasets returns known primary datasets """ self.dbs = DBSReader(self.endpoint) results = self.dbs.listPrimaryDatasets("Jet*") self.assertTrue("Jet" in results) self.assertTrue("JetMET" in results) self.assertTrue("JetMETTau" in results) self.assertFalse(self.dbs.listPrimaryDatasets("DoesntExist")) return @attr("integration") def testMatchProcessedDatasets(self): """ matchProcessedDatasets returns known processed datasets """ self.dbs = DBSReader(self.endpoint) dataset = self.dbs.matchProcessedDatasets("Jet", "RAW", "Run2011A-v1") self.assertEqual(1, len(dataset)) self.assertEqual(["/Jet/Run2011A-v1/RAW"], dataset[0]["PathList"]) self.assertEqual("Run2011A-v1", dataset[0]["Name"]) self.assertFalse(self.dbs.matchProcessedDatasets("Jet", "RAW", "Run2011A-v666")) @attr("integration") def testlistRuns(self): """listRuns returns known runs""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRuns(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(174074 in runs) runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK) self.assertEqual([173657], runs) @attr("integration") def testlistRunLumis(self): """listRunLumis returns known runs and lumicounts""" self.dbs = DBSReader(self.endpoint) runs = self.dbs.listRunLumis(dataset=DATASET) self.assertEqual(46, len(runs)) self.assertTrue(173692 in runs) self.assertEqual(runs[173692], 2782) runs = self.dbs.listRuns(dataset=DATASET, block=BLOCK) self.assertEqual({173657: 94}, runs) @attr("integration") def testListProcessedDatasets(self): """listProcessedDatasets returns known processed datasets""" self.dbs = DBSReader(self.endpoint) datasets = self.dbs.listProcessedDatasets("Jet", "RAW") self.assertTrue("Run2011A-v1" in datasets) self.assertTrue("Run2011B-v1" in datasets) self.assertFalse(self.dbs.listProcessedDatasets("Jet", "blah")) self.assertFalse(self.dbs.listProcessedDatasets("blah", "RAW")) @attr("integration") def testlistDatasetFiles(self): """listDatasetFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.listDatasetFiles(DATASET) self.assertEqual(49, len(files)) self.assertTrue(FILE in files) @attr("integrtion") def testGetDBSSummaryInfo(self): """getDBSSummaryInfo returns summary of dataset and block""" self.dbs = DBSReader(self.endpoint) dataset = self.dbs.getDBSSummaryInfo(DATASET) self.assertEqual(dataset["path"], DATASET) self.assertEqual(dataset["block"], "") self.assertEqual(dataset["NumberOfEvents"], "22075") self.assertEqual(dataset["NumberOfBlocks"], "46") self.assertEqual(dataset["total_size"], "4001680824") self.assertEqual(dataset["NumberOfFiles"], "49") self.assertEqual(dataset["NumberOfLumis"], "7223") block = self.dbs.getDBSSummaryInfo(DATASET, BLOCK) self.assertEqual(block["path"], "") self.assertEqual(block["block"], BLOCK) self.assertEqual(block["NumberOfEvents"], "377") self.assertEqual(block["NumberOfBlocks"], "1") self.assertEqual(block["total_size"], "150780132") self.assertEqual(block["NumberOfFiles"], "2") self.assertEqual(block["NumberOfLumis"], "94") self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET + "blah") self.assertRaises(DBSReaderError, self.dbs.getDBSSummaryInfo, DATASET, BLOCK + "asas") @attr("integration") def testGetFileBlocksInfo(self): """getFileBlocksInfo returns block info, including location lookup""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.getFileBlocksInfo(DATASET) block = self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK) self.assertEqual(1, len(block)) block = block[0] self.assertEqual(46, len(blocks)) self.assertTrue(block["Name"] in [x["Name"] for x in blocks]) self.assertEqual(BLOCK, block["Name"]) # self.assertEqual(377, block['NumberOfEvents']) self.assertEqual(150780132, block["BlockSize"]) self.assertEqual(2, block["NumberOfFiles"]) # possibly fragile but assume block located at least at cern sites = [x["Name"] for x in block["StorageElementList"] if x["Name"].find("cern.ch") > -1] self.assertTrue(sites) # weird error handling - depends on whether block or dataset is missing self.assertRaises(DBSReaderError, self.dbs.getFileBlocksInfo, DATASET + "blah") self.assertFalse(self.dbs.getFileBlocksInfo(DATASET, blockName=BLOCK + "asas")) @attr("integration") def testListFileBlocks(self): """listFileBlocks returns block names in dataset""" self.dbs = DBSReader(self.endpoint) blocks = self.dbs.listFileBlocks(DATASET) # block is closed block = self.dbs.listFileBlocks(DATASET, blockName=BLOCK, onlyClosedBlocks=True)[0] self.assertEqual(block, BLOCK) self.assertTrue(BLOCK in block) @attr("integration") def testListOpenFileBlocks(self): """listOpenFileBlocks finds open blocks""" # hard to find a dataset with open blocks, so don't bother self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.listOpenFileBlocks(DATASET)) @attr("integration") def testBlockExists(self): """blockExists returns existence of blocks""" self.dbs = DBSReader(self.endpoint) self.assertTrue(self.dbs.blockExists(BLOCK)) self.assertFalse(self.dbs.blockExists(DATASET + "#somethingelse")) @attr("integration") def testListFilesInBlock(self): """listFilesInBlock returns files in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in [x["LogicalFileName"] for x in self.dbs.listFilesInBlock(BLOCK)]) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlock, DATASET + "#blah") @attr("integration") def testListFilesInBlockWithParents(self): """listFilesInBlockWithParents gets files with parents for a block""" # hope PromptReco doesn't get deleted self.dbs = DBSReader(self.endpoint) files = self.dbs.listFilesInBlockWithParents( "/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60" ) self.assertEqual(1, len(files)) self.assertEqual( "/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60", files[0]["Block"]["Name"] ) self.assertEqual( "/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root", files[0]["ParentList"][0]["LogicalFileName"], ) self.assertRaises(DBSReaderError, self.dbs.listFilesInBlockWithParents, BLOCK + "asas") @attr("integration") def testLfnsInBlock(self): """lfnsInBlock returns lfns in block""" self.dbs = DBSReader(self.endpoint) self.assertTrue(FILE in self.dbs.lfnsInBlock(BLOCK)) self.assertRaises(DBSReaderError, self.dbs.lfnsInBlock, BLOCK + "asas") @attr("integration") def testListFileBlockLocation(self): """listFileBlockLocation returns block location""" self.dbs = DBSReader(self.endpoint) # assume one site is cern sites = [x for x in self.dbs.listFileBlockLocation(BLOCK) if x.find("cern.ch") > -1] self.assertTrue(sites) # doesn't raise on non-existant block self.assertFalse(self.dbs.listFileBlockLocation(BLOCK + "blah")) @attr("integration") def testGetFileBlock(self): """getFileBlock returns block""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlock(BLOCK) self.assertEqual(len(block), 1) block = block[BLOCK] self.assertEqual(2, len(block["Files"])) self.assertRaises(DBSReaderError, self.dbs.getFileBlock, BLOCK + "asas") @attr("integration") def testGetFileBlockWithParents(self): """getFileBlockWithParents returns block and parents""" self.dbs = DBSReader(self.endpoint) block = self.dbs.getFileBlockWithParents( "/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60" ) self.assertEqual(len(block), 1) block = block["/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60"] self.assertEqual( "/store/data/Run2011A/Jet/RAW/v1/000/160/433/24B46223-0D4E-E011-B573-0030487C778E.root", block["Files"][0]["ParentList"][0]["LogicalFileName"], ) self.assertRaises(DBSReaderError, self.dbs.getFileBlockWithParents, BLOCK + "asas") @attr("integration") def testGetFiles(self): """getFiles returns files in dataset""" self.dbs = DBSReader(self.endpoint) files = self.dbs.getFiles(DATASET) self.assertEqual(len(files), 46) @attr("integration") def testListBlockParents(self): """listBlockParents returns block parents""" self.dbs = DBSReader(self.endpoint) parents = self.dbs.listBlockParents("/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60") self.assertEqual(1, len(parents)) self.assertEqual("/Jet/Run2011A-v1/RAW#37cf2a40-4e0e-11e0-9833-00151755cb60", parents[0]["Name"]) sites = [x for x in parents[0]["StorageElementList"] if x.find("cern.ch") > -1] self.assertTrue(sites) self.assertFalse( self.dbs.listBlockParents("/Jet/Run2011A-PromptReco-v1/RECO#f8d36af3-4fb6-11e0-9d39-00151755cb60dsl") ) @attr("integration") def testBlockIsOpen(self): """blockIsOpen checks if a block is open""" self.dbs = DBSReader(self.endpoint) self.assertFalse(self.dbs.blockIsOpen(BLOCK)) @attr("integration") def testBlockToDatasetPath(self): """blockToDatasetPath extracts path from block name""" self.dbs = DBSReader(self.endpoint) self.assertEqual(self.dbs.blockToDatasetPath(BLOCK), DATASET) self.assertFalse(self.dbs.blockToDatasetPath(BLOCK + "asas"))