class HarvestTest(unittest.TestCase): """ _HarvestTest_ Test for EndOfRun job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"]) self.splitterFactory = SplitterFactory(package = "WMCore.JobSplitting") myThread = threading.currentThread() self.myThread = myThread daoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) self.WMBSFactory = daoFactory config = self.getConfig() self.changer = ChangeState(config) myResourceControl = ResourceControl() myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE", "SomeCE") myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE2", "SomeCE") myResourceControl.insertSite("SomeSite2", 10, 20, "SomeSE3", "SomeCE2") self.fileset1 = Fileset(name = "TestFileset1") for file in range(11): newFile = File("/some/file/name%d" % file, size = 1000, events = 100) newFile.addRun(Run(1,*[1])) newFile.setLocation('SomeSE') self.fileset1.addFile(newFile) self.fileset1.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Harvest", type = "Harvesting") self.subscription1.create() self.configFile = EmulatorSetup.setupWMAgentConfig() return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() EmulatorSetup.deleteConfig(self.configFile) return def getConfig(self): """ _getConfig_ """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', None) config.JobStateMachine.couchDBName = 'wmagent_jobdump' return config def finishJobs(self, jobGroups, subscription = None): """ _finishJobs_ """ if not subscription: subscription = self.subscription1 for f in subscription.acquiredFiles(): subscription.completeFiles(f) for jobGroup in jobGroups: self.changer.propagate(jobGroup.jobs, 'executing', 'created') self.changer.propagate(jobGroup.jobs, 'complete', 'executing') self.changer.propagate(jobGroup.jobs, 'success', 'complete') self.changer.propagate(jobGroup.jobs, 'cleanout', 'success') return def testHarvestEndOfRunTrigger(self): """ _testDQMHarvestEndOfRunTrigger_ Make sure that the basic splitting algo works, which is only, ExpressMerge is ALL done, fire a job against that fileset """ self.assertEqual(self.fileset1.open, True, "Fileset is closed. Shouldn't") jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory() self.assertEqual(len(jobGroups), 0 , "We got 1 or more jobGroups with an open fileset and no periodic configuration") self.fileset1.markOpen(False) self.assertEqual(self.fileset1.open, False, "Fileset is opened, why?") # We should also check if there are aqcuired files, if there are, there are jobs, # we don't want to fire another jobs while previous are running (output is integrating whatever input) # TODO : The above one we can do when all is done. Not priority jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory() self.assertEqual(len(jobGroups), 1 , "Harvest jobsplitter didn't create a single jobGroup after the fileset was closed") return def testPeriodicTrigger(self): """ _testPeriodicTrigger_ """ self.assertEqual(self.fileset1.open, True, "Fileset is not open, not testing periodic here") # Test timeout (5s for this first test) # there should be no acquired files, if there are, shouldn't be a job #self.subscription1.acquireFiles(self.subscription1.availableFiles().pop()) jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory(periodic_harvest_interval = 3) self.assertEqual(len(jobGroups), 1 , "Didn't created the first periodic job when there were acquired files") # For the whole thing to work, faking the first job finishing, and putting the files as complete self.finishJobs(jobGroups) # Adding more of files, so we have new stuff to process for file in range(12,24): newFile = File("/some/file/name%d" % file, size = 1000, events = 100) newFile.addRun(Run(1,*[1])) newFile.setLocation('SomeSE') self.fileset1.addFile(newFile) self.fileset1.commit() # Testing that it doesn't create a job unless the delay is past jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory(periodic_harvest_interval = 2) self.assertEqual(len(jobGroups), 0 , "Created one or more job, when there were non-acquired file and the period is not passed by") time.sleep(2) jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory(periodic_harvest_interval = 2) self.assertEqual(len(jobGroups), 1 , "Didn't created one or more job, and there weren't and the period is passed by") # Finishing out previous jobs self.finishJobs(jobGroups) # Adding more of files, so we have new stuff to process for file in range(26,36): newFile = File("/some/file/name%d" % file, size = 1000, events = 100) newFile.addRun(Run(1,*[1])) newFile.setLocation('SomeSE') self.fileset1.addFile(newFile) self.fileset1.commit() # Trying to create another job just afterwards, it shouldn't, because it should respect the configured delay jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory(periodic_harvest_interval = 2) self.assertEqual(len(jobGroups), 0 , "Created one or more job, there are new files, but the delay is not past") time.sleep(2) jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory(periodic_harvest_interval = 2) self.assertEqual(len(jobGroups), 1 , "Didn't created one or more job, there are new files and the delay is past") # Last check is whether the job gets all the files or not numFilesJob = jobGroups[0].jobs[0].getFiles() numFilesFileset = self.fileset1.getFiles() self.assertEqual(numFilesJob, numFilesFileset, "Job didn't got all the files") # Finishing out previous jobs self.finishJobs(jobGroups) # Adding files for the first location for file in range(38,48): newFile = File("/some/file/name%d" % file, size = 1000, events = 100) newFile.addRun(Run(1,*[1])) newFile.setLocation('SomeSE') self.fileset1.addFile(newFile) self.fileset1.commit() # Then another location for file in range(50,56): newFile = File("/some/file/name%d" % file, size = 1000, events = 100) newFile.addRun(Run(1,*[1])) newFile.setLocation('SomeSE3') self.fileset1.addFile(newFile) self.fileset1.commit() # We should have jobs in both locations time.sleep(2) jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory(periodic_harvest_interval = 2) self.assertEqual(len(jobGroups[0].getJobs()), 2 , "We didn't get 2 jobs for 2 locations") firstJobLocation = jobGroups[0].getJobs()[0].getFileLocations()[0] secondJobLocation = jobGroups[0].getJobs()[1].getFileLocations()[0] self.assertEqual(firstJobLocation, 'SomeSite', "First job location is not SomeSite") self.assertEqual(secondJobLocation, 'SomeSite2', "Second job location is not SomeSite2") self.finishJobs(jobGroups) for file in range(60,65): newFile = File("/some/file/name%d" % file, size = 1000, events = 100) newFile.addRun(Run(2,*[2])) newFile.setLocation('SomeSE3') self.fileset1.addFile(newFile) self.fileset1.commit() for file in range(70,75): newFile = File("/some/file/name%d" % file, size = 1000, events = 100) newFile.addRun(Run(3,*[3])) newFile.setLocation('SomeSE3') self.fileset1.addFile(newFile) self.fileset1.commit() time.sleep(2) jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) jobGroups = jobFactory(periodic_harvest_interval = 2) # This is one of the most "complicated" tests so worth to comment, 4 jobs should be created # 1 - all previous files from SomeSE and run = 1 (a lot, like ~45) # 2 - Few files from SomeSE3, Run = 1 # 3 - Few files from SomeSE3, Run = 2 # 4 - Few files from SomeSE3, Run = 3 self.assertEqual(len(jobGroups[0].getJobs()), 4 , "We didn't get 4 jobs for adding 2 different runs to SomeSE3") return def testMultipleRunHarvesting(self): """ _testMultipleRunHarvesting_ Add some files with multiple runs in each, make sure the jobs are created by location and run. Verify each job mask afterwards. Note that in this test run are splitted between sites, in real life that MUST NOT happen we still don't support that. """ multipleFilesFileset = Fileset(name = "TestFileset") newFile = File("/some/file/test1", size = 1000, events = 100) newFile.addRun(Run(1,*[1,3,4,5,6,7])) newFile.addRun(Run(2,*[1,2,4,5,6,7])) newFile.setLocation('SomeSE') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test2", size = 1000, events = 100) newFile.addRun(Run(1,*[2,8])) newFile.addRun(Run(2,*[3,8])) newFile.setLocation('SomeSE3') multipleFilesFileset.addFile(newFile) multipleFilesFileset.create() harvestingWorkflow = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow", task="Test") harvestingWorkflow.create() harvestSub = Subscription(fileset = multipleFilesFileset, workflow = harvestingWorkflow, split_algo = "Harvest", type = "Harvesting") harvestSub.create() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = harvestSub) jobGroups = jobFactory(periodic_harvest_interval = 2) self.assertEqual(len(jobGroups), 1, "A single job group was not created") self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created") for job in jobGroups[0].getJobs(): runs = job['mask'].getRunAndLumis() self.assertEqual(len(runs), 1, "Job has more than one run configured") possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8]], 2 : [[1,2],[4,7],[3,3],[8,8]]} run = runs.keys()[0] for lumiPair in runs[run]: self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask") self.finishJobs(jobGroups, harvestSub) newFile = File("/some/file/test3", size = 1000, events = 100) newFile.addRun(Run(1,*range(9,15))) newFile.setLocation('SomeSE3') multipleFilesFileset.addFile(newFile) multipleFilesFileset.commit() time.sleep(2) jobGroups = jobFactory(periodic_harvest_interval = 2) self.assertEqual(len(jobGroups), 1, "A single job group was not created") self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created") for job in jobGroups[0].getJobs(): runs = job['mask'].getRunAndLumis() self.assertEqual(len(runs), 1, "Job has more than one run configured") possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8],[9,14]], 2 : [[1,2],[4,7],[3,3],[8,8]]} run = runs.keys()[0] for lumiPair in runs[run]: self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask") harvestingWorkflowSib = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflowSib", task="TestSib") harvestingWorkflowSib.create() harvestSubSib = Subscription(fileset = multipleFilesFileset, workflow = harvestingWorkflowSib, split_algo = "Harvest", type = "Harvesting") harvestSubSib.create() jobFactorySib = self.splitterFactory(package = "WMCore.WMBS", subscription = harvestSubSib) multipleFilesFileset.markOpen(False) jobGroups = jobFactorySib(periodic_harvest_sibling = True) self.assertEqual(len(jobGroups), 0, "A single job group was created") self.finishJobs(jobGroups, harvestSub) jobGroups = jobFactorySib(periodic_harvest_sibling = True) self.assertEqual(len(jobGroups), 1, "A single job group was not created") self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created") for job in jobGroups[0].getJobs(): runs = job['mask'].getRunAndLumis() self.assertEqual(len(runs), 1, "Job has more than one run configured") possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8],[9,14]], 2 : [[1,2],[4,7],[3,3],[8,8]]} run = runs.keys()[0] for lumiPair in runs[run]: self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")
class RepackMergeTest(unittest.TestCase): """ _RepackMergeTest_ Test for RepackMerge job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN') """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN2') """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'TIME' : int(time.time()), 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 2 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 5 }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "A" }, transaction = False) insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion") insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1, 'STREAM' : 'A', 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer") insertStreamerDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4, 'STREAM' : "A", 'LFN' : "/testLFN/A", 'FILESIZE' : 100, 'EVENTS' : 100, 'TIME' : int(time.time()) }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name = "TestFileset1") self.fileset2 = Fileset(name = "TestFileset2") self.fileset1.load() self.fileset2.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Repack", type = "Repack") self.subscription2 = Subscription(fileset = self.fileset2, workflow = workflow2, split_algo = "RepackMerge", type = "RepackMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction = False) # keep for later self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis") self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi") self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers") self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles") self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024 self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 100000000 self.splitArgs['maxInputFiles'] = 1000 self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024 return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def deleteSplitLumis(self): """ _deleteSplitLumis_ """ myThread = threading.currentThread() myThread.dbi.processData("""DELETE FROM lumi_section_split_active """, transaction = False) return def test00(self): """ _test00_ Test that the job name prefix feature works Test max edm size threshold for single lumi small lumi, followed by over-large lumi expect 1 job for small lumi and 4 jobs for over-large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2 * lumi): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxEdmSize'] = 13000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("RepackMerge-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 3, "ERROR: Job does not process 3 files") job = jobGroups[0].jobs[2] self.assertEqual(len(job.getFiles()), 1, "ERROR: Job does not process 1 file") return def test01(self): """ _test01_ Test max size threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test02(self): """ _test02_ Test max event threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100 * lumi) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 300 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test03(self): """ _test03_ Test max input files threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(lumi * 2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 3 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return def test04(self): """ _test04_ Test max size threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputSize'] = 5000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test05(self): """ _test05_ Test max event threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test06(self): """ _test06_ Test max input files threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test07(self): """ _test07_ Test over merge one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return def test08(self): """ _test08_ Test under merge (over merge size threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxOverSize'] = 9500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test09(self): """ _test09_ Test under merge (over merge event threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxInputEvents'] = 300 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test10(self): """ _test10_ Test merging of multiple lumis with holes in the lumi sequence Hole is due to no streamer files for the lumi Multi lumi input """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 5]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4, 'STREAM' : "A", 'FILECOUNT' : 1, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) self.feedStreamersDAO.execute(transaction = False) self.fileset1.loadData() jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") for fileid in self.fileset1.getFiles(type = 'id'): self.acquireFilesDAO.execute(self.subscription1['id'], fileid, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") for fileid in self.fileset1.getFiles(type = 'id'): self.completeFilesDAO.execute(self.subscription1['id'], fileid, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
class RepackMergeTest(unittest.TestCase): """ _RepackMergeTest_ Test for RepackMerge job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN') """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN2') """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 2 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 5 }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "A" }, transaction = False) insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion") insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1, 'STREAM' : 'A', 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer") insertStreamerDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4, 'STREAM' : "A", 'LFN' : "/testLFN/A", 'FILESIZE' : 100, 'EVENTS' : 100, 'TIME' : int(time.time()) }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name = "TestFileset1") self.fileset2 = Fileset(name = "TestFileset2") self.fileset1.load() self.fileset2.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Repack", type = "Repack") self.subscription2 = Subscription(fileset = self.fileset2, workflow = workflow2, split_algo = "RepackMerge", type = "RepackMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction = False) # keep for later self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis") self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi") self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers") self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles") self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024 self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 100000000 self.splitArgs['maxInputFiles'] = 1000 self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024 return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def deleteSplitLumis(self): """ _deleteSplitLumis_ """ myThread = threading.currentThread() myThread.dbi.processData("""DELETE FROM lumi_section_split_active """, transaction = False) return def test00(self): """ _test00_ Test that the job name prefix feature works Test max edm size threshold for single lumi small lumi, followed by over-large lumi expect 1 job for small lumi and 4 jobs for over-large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2 * lumi): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxEdmSize'] = 13000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("RepackMerge-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 3, "ERROR: Job does not process 3 files") job = jobGroups[0].jobs[2] self.assertEqual(len(job.getFiles()), 1, "ERROR: Job does not process 1 file") return def test01(self): """ _test01_ Test max size threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test02(self): """ _test02_ Test max event threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100 * lumi) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 300 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test03(self): """ _test03_ Test max input files threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(lumi * 2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 3 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return def test04(self): """ _test04_ Test max size threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputSize'] = 5000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test05(self): """ _test05_ Test max event threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test06(self): """ _test06_ Test max input files threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test07(self): """ _test07_ Test over merge one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return def test08(self): """ _test08_ Test under merge (over merge size threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxOverSize'] = 9500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test09(self): """ _test09_ Test under merge (over merge event threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxInputEvents'] = 300 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test10(self): """ _test10_ Test merging of multiple lumis with holes in the lumi sequence Hole is due to no streamer files for the lumi Multi lumi input """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 5]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4, 'STREAM' : "A", 'FILECOUNT' : 1, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) self.feedStreamersDAO.execute(transaction = False) self.fileset1.loadData() jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") for fileid in self.fileset1.getFiles(type = 'id'): self.acquireFilesDAO.execute(self.subscription1['id'], fileid, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") for fileid in self.fileset1.getFiles(type = 'id'): self.completeFilesDAO.execute(self.subscription1['id'], fileid, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
class HarvestTest(unittest.TestCase): """ _HarvestTest_ Test for EndOfRun job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"]) self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting") myThread = threading.currentThread() self.myThread = myThread daoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) self.WMBSFactory = daoFactory config = self.getConfig() self.changer = ChangeState(config) myResourceControl = ResourceControl() myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T1_US_FNAL_Disk", "T1_US_FNAL") myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T3_US_FNALLPC", "T1_US_FNAL") myResourceControl.insertSite("T2_CH_CERN", 10, 20, "T2_CH_CERN", "T2_CH_CERN") self.fileset1 = Fileset(name="TestFileset1") for fileNum in range(11): newFile = File("/some/file/name%d" % fileNum, size=1000, events=100) newFile.addRun(Run(1, *[1])) newFile.setLocation('T1_US_FNAL_Disk') self.fileset1.addFile(newFile) self.fileset1.create() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Harvest", type="Harvesting") self.subscription1.create() self.configFile = EmulatorSetup.setupWMAgentConfig() return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() EmulatorSetup.deleteConfig(self.configFile) return def getConfig(self): """ _getConfig_ """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', None) config.JobStateMachine.couchDBName = 'wmagent_jobdump' return config def finishJobs(self, jobGroups, subscription=None): """ _finishJobs_ """ if not subscription: subscription = self.subscription1 for f in subscription.acquiredFiles(): subscription.completeFiles(f) for jobGroup in jobGroups: self.changer.propagate(jobGroup.jobs, 'executing', 'created') self.changer.propagate(jobGroup.jobs, 'complete', 'executing') self.changer.propagate(jobGroup.jobs, 'success', 'complete') self.changer.propagate(jobGroup.jobs, 'cleanout', 'success') return def testHarvestEndOfRunTrigger(self): """ _testDQMHarvestEndOfRunTrigger_ Make sure that the basic splitting algo works, which is only, ExpressMerge is ALL done, fire a job against that fileset """ self.assertEqual(self.fileset1.open, True, "Fileset is closed. Shouldn't") jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory() self.assertEqual(len(jobGroups), 0, "We got 1 or more jobGroups with an open fileset and no periodic configuration") self.fileset1.markOpen(False) self.assertEqual(self.fileset1.open, False, "Fileset is opened, why?") # We should also check if there are aqcuired files, if there are, there are jobs, # we don't want to fire another jobs while previous are running (output is integrating whatever input) # TODO : The above one we can do when all is done. Not priority jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory() self.assertEqual(len(jobGroups), 1, "Harvest jobsplitter didn't create a single jobGroup after the fileset was closed") return def testPeriodicTrigger(self): """ _testPeriodicTrigger_ """ self.assertEqual(self.fileset1.open, True, "Fileset is not open, not testing periodic here") # Test timeout (5s for this first test) # there should be no acquired files, if there are, shouldn't be a job # self.subscription1.acquireFiles(self.subscription1.availableFiles().pop()) jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory(periodic_harvest_interval=3) self.assertEqual(len(jobGroups), 1, "Didn't created the first periodic job when there were acquired files") # For the whole thing to work, faking the first job finishing, and putting the files as complete self.finishJobs(jobGroups) # Adding more of files, so we have new stuff to process for fileNum in range(12, 24): newFile = File("/some/file/name%d" % fileNum, size=1000, events=100) newFile.addRun(Run(1, *[1])) newFile.setLocation('T1_US_FNAL_Disk') self.fileset1.addFile(newFile) self.fileset1.commit() # Testing that it doesn't create a job unless the delay is past jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory(periodic_harvest_interval=2) self.assertEqual(len(jobGroups), 0, "Created one or more job, when there were non-acquired file and the period is not passed by") time.sleep(2) jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory(periodic_harvest_interval=2) self.assertEqual(len(jobGroups), 1, "Didn't created one or more job, and there weren't and the period is passed by") # Finishing out previous jobs self.finishJobs(jobGroups) # Adding more of files, so we have new stuff to process for fileNum in range(26, 36): newFile = File("/some/file/name%d" % fileNum, size=1000, events=100) newFile.addRun(Run(1, *[1])) newFile.setLocation('T1_US_FNAL_Disk') self.fileset1.addFile(newFile) self.fileset1.commit() # Trying to create another job just afterwards, it shouldn't, because it should respect the configured delay jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory(periodic_harvest_interval=2) self.assertEqual(len(jobGroups), 0, "Created one or more job, there are new files, but the delay is not past") time.sleep(2) jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory(periodic_harvest_interval=2) self.assertEqual(len(jobGroups), 1, "Didn't created one or more job, there are new files and the delay is past") # Last check is whether the job gets all the files or not numFilesJob = jobGroups[0].jobs[0].getFiles() numFilesFileset = self.fileset1.getFiles() self.assertEqual(numFilesJob, numFilesFileset, "Job didn't got all the files") # Finishing out previous jobs self.finishJobs(jobGroups) # Adding files for the first location for fileNum in range(38, 48): newFile = File("/some/file/name%d" % fileNum, size=1000, events=100) newFile.addRun(Run(1, *[1])) newFile.setLocation('T1_US_FNAL_Disk') self.fileset1.addFile(newFile) self.fileset1.commit() # Then another location for fileNum in range(50, 56): newFile = File("/some/file/name%d" % fileNum, size=1000, events=100) newFile.addRun(Run(1, *[1])) newFile.setLocation('T2_CH_CERN') self.fileset1.addFile(newFile) self.fileset1.commit() # We should have jobs in both locations time.sleep(2) jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory(periodic_harvest_interval=2) self.assertEqual(len(jobGroups[0].getJobs()), 2, "We didn't get 2 jobs for 2 locations") firstJobLocation = jobGroups[0].getJobs()[0].getFileLocations()[0] secondJobLocation = jobGroups[0].getJobs()[1].getFileLocations()[0] self.assertEqual(firstJobLocation, 'T2_CH_CERN') self.assertEqual(secondJobLocation, 'T1_US_FNAL') self.finishJobs(jobGroups) for fileNum in range(60, 65): newFile = File("/some/file/name%d" % fileNum, size=1000, events=100) newFile.addRun(Run(2, *[2])) newFile.setLocation('T2_CH_CERN') self.fileset1.addFile(newFile) self.fileset1.commit() for fileNum in range(70, 75): newFile = File("/some/file/name%d" % fileNum, size=1000, events=100) newFile.addRun(Run(3, *[3])) newFile.setLocation('T2_CH_CERN') self.fileset1.addFile(newFile) self.fileset1.commit() time.sleep(2) jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) jobGroups = jobFactory(periodic_harvest_interval=2) # This is one of the most "complicated" tests so worth to comment, 4 jobs should be created # 1 - all previous files from SomeSE and run = 1 (a lot, like ~45) # 2 - Few files from SomeSE3, Run = 1 # 3 - Few files from SomeSE3, Run = 2 # 4 - Few files from SomeSE3, Run = 3 self.assertEqual(len(jobGroups[0].getJobs()), 4, "We didn't get 4 jobs for adding 2 different runs to SomeSE3") return def testMultipleRunHarvesting(self): """ _testMultipleRunHarvesting_ Add some files with multiple runs in each, make sure the jobs are created by location and run. Verify each job mask afterwards. Note that in this test run are splitted between sites, in real life that MUST NOT happen we still don't support that. """ multipleFilesFileset = Fileset(name="TestFileset") newFile = File("/some/file/test1", size=1000, events=100) newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7])) newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7])) newFile.setLocation('T1_US_FNAL_Disk') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test2", size=1000, events=100) newFile.addRun(Run(1, *[2, 8])) newFile.addRun(Run(2, *[3, 8])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) multipleFilesFileset.create() harvestingWorkflow = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow", task="Test") harvestingWorkflow.create() harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow, split_algo="Harvest", type="Harvesting") harvestSub.create() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub) jobGroups = jobFactory(periodic_harvest_interval=2) self.assertEqual(len(jobGroups), 1, "A single job group was not created") self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created") for job in jobGroups[0].getJobs(): runs = job['mask'].getRunAndLumis() self.assertEqual(len(runs), 1, "Job has more than one run configured") ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8]], 2: [[1, 2], [4, 7], [3, 3], [8, 8]]}) run = runs.keys()[0] for lumiPair in runs[run]: for lumi in range(lumiPair[0], lumiPair[1] + 1): self.assertTrue((str(run), lumi) in ll, "All of %s not in %s" % (lumiPair, ll)) self.finishJobs(jobGroups, harvestSub) newFile = File("/some/file/test3", size=1000, events=100) newFile.addRun(Run(1, *range(9, 15))) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) multipleFilesFileset.commit() time.sleep(2) jobGroups = jobFactory(periodic_harvest_interval=2) self.assertEqual(len(jobGroups), 1, "A single job group was not created") self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created") for job in jobGroups[0].getJobs(): runs = job['mask'].getRunAndLumis() self.assertEqual(len(runs), 1, "Job has more than one run configured") ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8], [9, 14]], 2: [[1, 2], [4, 7], [3, 3], [8, 8]]}) run = runs.keys()[0] for lumiPair in runs[run]: for lumi in range(lumiPair[0], lumiPair[1] + 1): self.assertTrue((run, lumi) in ll, "All of %s not in %s" % (lumiPair, ll)) harvestingWorkflowSib = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflowSib", task="TestSib") harvestingWorkflowSib.create() harvestSubSib = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflowSib, split_algo="Harvest", type="Harvesting") harvestSubSib.create() jobFactorySib = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSubSib) multipleFilesFileset.markOpen(False) jobGroups = jobFactorySib(periodic_harvest_sibling=True) self.assertEqual(len(jobGroups), 0, "A single job group was created") self.finishJobs(jobGroups, harvestSub) jobGroups = jobFactorySib(periodic_harvest_sibling=True) self.assertEqual(len(jobGroups), 1, "A single job group was not created") self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created") for job in jobGroups[0].getJobs(): runs = job['mask'].getRunAndLumis() self.assertEqual(len(runs), 1, "Job has more than one run configured") ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8], [9, 14]], 2: [[1, 2], [4, 7], [3, 3], [8, 8]]}) run = runs.keys()[0] for lumiPair in runs[run]: for lumi in range(lumiPair[0], lumiPair[1] + 1): self.assertTrue((run, lumi) in ll, "All of %s not in %s" % (lumiPair, ll)) def testMultiRunHarvesting(self): """ _testMultiRunHarvesting_ Provided a fileset with a couple of files and different runs, create a single job for all the runs at a specific location, which also adds a baggage to the job (True) which is later on looked up by SetupCMSSWPSet. """ multipleFilesFileset = createCommonFileset() self.assertEqual(multipleFilesFileset.open, True) harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro", name="TestWorkflow", task="Test") harvestingWorkflow.create() harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow, split_algo="Harvest", type="Harvesting") harvestSub.create() multipleFilesFileset.markOpen(False) self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed") jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub) jobGroups = jobFactory(dqmHarvestUnit="multiRun") self.assertEqual(len(jobGroups), 1) for jobGroup in jobGroups: self.assertEqual(len(jobGroup.jobs), 1) for job in jobGroup.jobs: baggage = job.getBaggage() self.assertTrue(getattr(baggage, "multiRun", False), "It's supposed to be a multiRun job") self.assertEqual(getattr(baggage, "runLimits", ""), "-1-6") def testByRunHarvesting(self): """ _testByRunHarvesting_ Provided a fileset with a couple of files and 4 different runs, create one single job per run and location. The multiRun baggage should be false in this case. """ multipleFilesFileset = createCommonFileset() self.assertEqual(multipleFilesFileset.open, True, "Fileset should be open!") harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro", name="TestWorkflow", task="Test") harvestingWorkflow.create() harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow, split_algo="Harvest", type="Harvesting") harvestSub.create() multipleFilesFileset.markOpen(False) self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed") jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub) jobGroups = jobFactory() self.assertEqual(len(jobGroups), 1, "Should have created 1 job group") for jobGroup in jobGroups: self.assertEqual(len(jobGroup.jobs), 6, "Should have created 6 jobs") for job in jobGroup.jobs: baggage = job.getBaggage() self.assertFalse(getattr(baggage, "multiRun", False), "It's supposed to be a byRun job") def testByRunAndRunWhitelist(self): """ _testByRunAndRunWhitelist_ Create harvesting jobs by run for the runs provided in the RunWhitelist """ multipleFilesFileset = createCommonFileset() self.assertEqual(multipleFilesFileset.open, True) harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro", name="TestWorkflow", task="Test") harvestingWorkflow.create() harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow, split_algo="Harvest", type="Harvesting") harvestSub.create() multipleFilesFileset.markOpen(False) self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed") jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub) jobGroups = jobFactory(runWhitelist=[1, 3]) self.assertEqual(len(jobGroups), 1, "One jobgroup per location") for jobGroup in jobGroups: self.assertEqual(len(jobGroup.jobs), 2) def testByRunAndRunBlacklist(self): """ _testByRunAndRunWhitelist_ Create harvesting jobs by run for the runs provided in the RunWhitelist """ multipleFilesFileset = createCommonFileset() self.assertEqual(multipleFilesFileset.open, True) harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro", name="TestWorkflow", task="Test") harvestingWorkflow.create() harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow, split_algo="Harvest", type="Harvesting") harvestSub.create() multipleFilesFileset.markOpen(False) self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed") jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub) jobGroups = jobFactory(runWhitelist=[1, 2, 3, 4, 5], runBlacklist=[1, 3]) self.assertEqual(len(jobGroups), 1, "One jobgroup per location") for jobGroup in jobGroups: self.assertEqual(len(jobGroup.jobs), 3)
class PeriodicTest(unittest.TestCase): def setUp(self): """ _setUp_ Create a single subscription with one file. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute(siteName = "site1", seName = "somese.cern.ch") locationAction.execute(siteName = "site2", seName = "otherse.cern.ch") self.testFileset = Fileset(name = "TestFileset1") self.testFileset.create() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test" ) testWorkflow.create() self.testSubscription = Subscription(fileset = self.testFileset, workflow = testWorkflow, split_algo = "Periodic", type = "Processing") self.testSubscription.create() return def tearDown(self): """ _tearDown_ Clear out WMBS. """ self.testInit.clearDatabase() return def injectFile(self): """ _injectFile_ Inject a file into the periodic splitting input fileset. """ testFile = File(lfn = "/this/is/a/lfn%s" % time.time(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) testFile.create() self.testFileset.addFile(testFile) self.testFileset.commit() return def verifyFiles(self, wmbsJob): """ _verifyFiles_ Verify that the input files for the job are the same as the files in the input fileset. """ inputFiles = wmbsJob.getFiles() filesetFiles = self.testFileset.getFiles() for inputFile in inputFiles: assert inputFile in filesetFiles, \ "ERROR: Unknown file: %s" % inputFile filesetFiles.remove(inputFile) assert len(filesetFiles) == 0, \ "ERROR: Not all files included in job." return def testPeriodicSplitting(self): """ _testPeriodiciSplitting_ Manipulate the splitting algorithm to test the corner cases. """ splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = self.testSubscription) # First pass: no jobs exist. The algorithm should create a job # containing all available files. self.injectFile() jobGroups = jobFactory(job_period = 99999999999) assert len(jobGroups) == 1, \ "ERROR: Wrong number of job groups returned: %s" % len(jobGroups) assert len(jobGroups[0].jobs) == 1, \ "ERROR: Jobgroup has wrong number of jobs: %s" % len(jobGroups[0].jobs) wmbsJob = jobGroups[0].jobs.pop() self.verifyFiles(wmbsJob) # Verify that no jobs are generated as the previously issued job has not # completed yet. time.sleep(5) self.injectFile() moreJobGroups = jobFactory(job_period = 1) assert len(moreJobGroups) == 0, \ "ERROR: No jobgroups should be returned." # Complete the job so that the splitting algorithm will generate # another job. wmbsJob["state"] = "cleanout" wmbsJob["oldstate"] = "new" wmbsJob["couch_record"] = "somejive" wmbsJob["retry_count"] = 0 changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState") changeStateDAO.execute([wmbsJob]) # Verify that no jobs will be generated if the period has not yet # expried. self.injectFile() moreJobGroups = jobFactory(job_period = 999999999999) assert len(moreJobGroups) == 0, \ "ERROR: No jobgroups should be returned." # Verify that a job will be generated if the period has expired. time.sleep(5) self.injectFile() jobGroups = jobFactory(job_period = 1) assert len(jobGroups) == 1, \ "ERROR: Wrong number of job groups returned: %s" % len(jobGroups) assert len(jobGroups[0].jobs) == 1, \ "ERROR: Jobgroup has wrong number of jobs: %s" % len(jobGroups[0].jobs) self.verifyFiles(jobGroups[0].jobs.pop()) # Verify that no jobs will be generated in the case that a periodic job # is still running and the fileset has been closed. self.testFileset.markOpen(False) time.sleep(5) self.injectFile() jobGroups = jobFactory(job_period = 1) assert len(jobGroups) == 0, \ "ERROR: Wrong number of job groups returned: %s" % len(jobGroups) # Complete the outstanding job. wmbsJob["state"] = "cleanout" wmbsJob["oldstate"] = "new" wmbsJob["couch_record"] = "somejive" wmbsJob["retry_count"] = 0 changeStateDAO.execute([wmbsJob]) # Verify that when the input fileset is closed and all periodic jobs # are complete a job will not be generated. self.injectFile() jobGroups = jobFactory(job_period = 99999999999) assert len(jobGroups) == 0, \ "ERROR: Wrong number of job groups returned: %s" % len(jobGroups) # Verify that after the final job is complete no more jobs are generated. wmbsJob["state"] = "cleanout" wmbsJob["oldstate"] = "new" wmbsJob["couch_record"] = "somejive" wmbsJob["retry_count"] = 0 changeStateDAO.execute([wmbsJob]) time.sleep(5) self.injectFile() moreJobGroups = jobFactory(job_period = 1) assert len(moreJobGroups) == 0, \ "ERROR: No jobgroups should be returned." return