def testMoreEvents(self): """ _testMoreEvents_ Test event based job splitting when the number of events per job is greater than the number of events in the input file. """ splitter = SplitterFactory() jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(events_per_job = 1000, performance = self.performanceParams) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory created %s jobs not one" % len(jobGroups[0].jobs) job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." assert job["mask"].getMaxEvents() is None, \ "ERROR: Job's max events is incorrect." assert job["mask"]["FirstEvent"] is None, \ "ERROR: Job's first event is incorrect." return
def testHardLimitSplittingOnly(self): """ _testHardLimitSplittingOnly_ Checks that we can split a set of files where every file has a single lumi too big to fit in a runnable job """ splitter = SplitterFactory() # Create 3 single-big-lumi files testFileset = Fileset(name="FilesetA") testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch") testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription) # Fail single lumis with more than 800 events and put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 3) for job in jobs: self.assertTrue(job['failedOnCreation']) self.assertIn(' with too many events 1000 and it woud take 12000 sec to run', job['failedReason']) return
def testAllAcquired(self): """ _testAllAcquired_ should all return no job groups """ splitter = SplitterFactory() self.singleFileSubscription.acquireFiles( self.singleFileSubscription.availableFiles()) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.acquireFiles( self.multipleFileSubscription.availableFiles()) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.multipleLumiSubscription.acquireFiles( self.multipleLumiSubscription.availableFiles()) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.acquireFiles( self.singleLumiSubscription.availableFiles()) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set")
def __init__(self, **configDict): """ init jobCreator """ myThread = threading.currentThread() self.transaction = myThread.transaction # DAO factory for WMBS objects self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) # WMCore splitter factory for splitting up jobs. self.splitterFactory = SplitterFactory() config = Configuration() config.section_("JobStateMachine") config.JobStateMachine.couchurl = configDict["couchURL"] config.JobStateMachine.couch_retries = configDict["defaultRetries"] config.JobStateMachine.couchDBName = configDict["couchDBName"] self.config = config # Variables self.jobCacheDir = configDict['jobCacheDir'] self.defaultJobType = configDict['defaultJobType'] self.limit = configDict.get('fileLoadLimit', 500) self.createWorkArea = CreateWorkArea() self.changeState = ChangeState(self.config) return
def testE_getParents(self): """ _getParents_ Test the TwoFileBased version of this code """ splitter = SplitterFactory() oneSetSubscription = self.createSubscription(nFiles=10, lumisPerFile=1) jobFactory = splitter(package="WMCore.WMBS", subscription=oneSetSubscription) jobGroups = jobFactory(lumis_per_job=3, split_files_between_job=True, include_parents=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 10) for job in jobGroups[0].jobs: self.assertTrue(len(job['input_files']), 1) f = job['input_files'][0] self.assertEqual(len(f['parents']), 1) self.assertEqual(f['lfn'].split('_')[0], list(f['parents'])[0]['lfn'].split('_')[0]) return
def testMoreEvents(self): """ _testMoreEvents_ Test event based job splitting when the number of events per job is greater than the number of events in the input file. Since the file has less events than the splitting, the job goes without a mask. """ splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = self.singleFileSubscription) jobGroups = jobFactory(events_per_job = 1000) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 1) job = jobGroups[0].jobs.pop() self.assertEqual(job.getFiles(type = "lfn"), ["/some/file/name"]) self.assertEqual(job["mask"].getMaxEvents(), None) self.assertEqual(job["mask"]["FirstEvent"], None) return
def testMCEventSplitOver32bit(self): """ _testMCEventSplitOver32bit_ Make sure that no events will go over a 32 bit unsigned integer representation, event counter should be reset in that case. Also test is not over cautious. """ firstEvent = 3*(2**30) + 1 singleMCSubscription = self.generateFakeMCFile(numEvents = 2**30, firstEvent = firstEvent) splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS",subscription = singleMCSubscription) jobGroups = jobFactory(events_per_job = 2**30 - 1, events_per_lumi = 2**30 - 1) self.assertEqual(len(jobGroups), 1, "Error: JobFactory did not return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "Error: JobFactory created %s jobs not two" % len(jobGroups[0].jobs)) for job in jobGroups[0].jobs: firstJobCondition = (job["mask"].getMaxEvents() == 2**30 - 1 and job["mask"]["FirstLumi"] == 1 and job["mask"]["FirstEvent"] == firstEvent and job["mask"]["LastEvent"] <= 2**32) secondJobCondition = (job["mask"].getMaxEvents() == 1 and job["mask"]["FirstLumi"] == 2 and job["mask"]["FirstEvent"] == 1) self.assertTrue(firstJobCondition or secondJobCondition, "Job mask: %s didn't pass neither of the conditions" % job["mask"])
def testSiteBlacklist(self): """ _testSiteBlacklist_ Same as testLocationSplit, but with a siteBlacklist for T2_CH_CERN. (do not allow jobs to run at T2_CH_CERN) """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleSiteSubscription) jobGroups = jobFactory(files_per_job=10, siteBlacklist=["T2_CH_CERN"], performance=self.performanceParams) self.assertEqual(len(jobGroups), 2) self.assertEqual(len(jobGroups[0].jobs), 1) self.assertEqual(jobGroups[0].jobs[0]["estimatedMemoryUsage"], 2300) self.assertEqual(jobGroups[0].jobs[0]["estimatedDiskUsage"], 100 * 400 * 5) self.assertEqual(jobGroups[0].jobs[0]["estimatedJobTime"], 100 * 12 * 5) self.assertEqual(jobGroups[0].jobs[0]["possiblePSN"], set(["T1_US_FNAL"])) self.assertEqual(len(jobGroups[1].jobs[0].getFiles()), 5) self.assertEqual(jobGroups[1].jobs[0]["estimatedMemoryUsage"], 2300) self.assertEqual(jobGroups[1].jobs[0]["estimatedDiskUsage"], 100 * 400 * 5) self.assertEqual(jobGroups[1].jobs[0]["estimatedJobTime"], 100 * 12 * 5) self.assertEqual(jobGroups[1].jobs[0]["possiblePSN"], set(["T1_US_FNAL"])) return
def testTrustSiteLists(self): """ _testTrustSiteLists_ Test trustSitelists splitting parameter to ignore job input file location and use siteWhitelist and siteBlacklist instead. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleSiteSubscription) jobGroups = jobFactory( files_per_job=10, siteWhitelist=["T2_AA_AAA", "T2_BB_BBB", "T2_CC_CCC", "T2_DD_DDD"], siteBlacklist=["T2_BB_BBB", "T2_DD_DDD", "T2_EE_EEE"], trustSitelists=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 1) self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 10) self.assertEqual(jobGroups[0].jobs[0]["estimatedMemoryUsage"], 2300) self.assertEqual(jobGroups[0].jobs[0]["estimatedDiskUsage"], 100 * 400 * 10) self.assertEqual(jobGroups[0].jobs[0]["estimatedJobTime"], 100 * 12 * 10) self.assertEqual(jobGroups[0].jobs[0]["possiblePSN"], set(["T2_AA_AAA", "T2_CC_CCC"])) return
def testMC50EventSplit(self): """ _testMC50EventSplit_ Test event based job splitting when the number of events per job is 50, this should result in two jobs. No lumi information supplied here. """ singleMCSubscription = self.generateFakeMCFile(firstLumi = 1, lastLumi = 2) splitter = SplitterFactory() jobFactory = splitter(singleMCSubscription) jobGroups = jobFactory(events_per_job = 50, performance = self.performanceParams) self.assertEqual(len(jobGroups), 1, "Error: JobFactory did not return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "Error: JobFactory created %s jobs not two" % len(jobGroups[0].jobs)) for job in jobGroups[0].jobs: firstJobCondition = (job["mask"].getMaxEvents() == 50 and job["mask"]["FirstLumi"] == 1 and job["mask"]["FirstEvent"] == 1) secondJobCondition = (job["mask"].getMaxEvents() == 50 and job["mask"]["FirstLumi"] == 2 and job["mask"]["FirstEvent"] == 51) self.assertTrue(firstJobCondition or secondJobCondition, "Job mask: %s didn't pass neither of the conditions" % job["mask"]) self.assertFalse(job.getBaggage().lheInputFiles) return
def testLocationSplit(self): """ _testLocationSplit_ This should test whether or not the FileBased algorithm understands that files at seperate sites cannot be in the same jobGroup (this is the current standard). """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleSiteSubscription) jobGroups = jobFactory(files_per_job=10, performance=self.performanceParams) self.assertEqual(len(jobGroups), 2) self.assertEqual(len(jobGroups[0].jobs), 1) self.assertEqual(jobGroups[0].jobs[0]["estimatedMemoryUsage"], 2300) self.assertEqual(jobGroups[0].jobs[0]["estimatedDiskUsage"], 100 * 400 * 5) self.assertEqual(jobGroups[0].jobs[0]["estimatedJobTime"], 100 * 12 * 5) self.assertEqual(jobGroups[0].jobs[0]["possiblePSN"], set(["T2_CH_CERN", "T1_US_FNAL"])) self.assertEqual(len(jobGroups[1].jobs[0].getFiles()), 5) self.assertEqual(jobGroups[1].jobs[0]["estimatedMemoryUsage"], 2300) self.assertEqual(jobGroups[1].jobs[0]["estimatedDiskUsage"], 100 * 400 * 5) self.assertEqual(jobGroups[1].jobs[0]["estimatedJobTime"], 100 * 12 * 5) self.assertEqual(jobGroups[1].jobs[0]["possiblePSN"], set(["T1_US_FNAL"])) return
def testMCMoreEvents(self): """ _testMCMoreEvents_ Test event based job splitting when the number of events per job is greater than the number of events in the input file and no lumi information was supplied. """ singleMCSubscription = self.generateFakeMCFile(firstLumi = 1, lastLumi = 1) splitter = SplitterFactory() jobFactory = splitter(singleMCSubscription) jobGroups = jobFactory(events_per_job = 1000, performance = self.performanceParams) self.assertEqual(len(jobGroups), 1, "Error: JobFactory did not return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "Error: JobFactory created %s jobs not one" % len(jobGroups[0].jobs)) job = jobGroups[0].jobs.pop() self.assertEqual(job.getFiles(type = "lfn"), ["MCFakeFileTest"], "Error: Job contains unknown files.") self.assertEqual(job["mask"].getMaxEvents(), 100, "Error: Job's max events is incorrect.") self.assertEqual(job["mask"]["FirstEvent"], 1, "Error: Job's first event is incorrect.") self.assertEqual(job["mask"]["FirstLumi"], 1, "Error: Job's first lumi is incorrect.") self.assertEqual(len(job["mask"].getRunAndLumis()), 0, "Error: Job's mask has runs and lumis")
def test150EventMultipleFileSplit(self): """ _test150EventMultipleFileSplit_ Test job splitting into 150 event jobs when the input subscription has more than one file available. This test verifies that the job splitting code will put at most one file in a job. """ splitter = SplitterFactory() jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(events_per_job = 150, performance = self.performanceParams) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 10, \ "ERROR: JobFactory created %s jobs not ten" % len(jobGroups[0].jobs) for job in jobGroups[0].jobs: assert len(job.getFiles(type = "lfn")) == 1, \ "ERROR: Job contains too many files." assert job["mask"].getMaxEvents() is None, \ "ERROR: Job's max events is incorrect." assert job["mask"]["FirstEvent"] is None, \ "ERROR: Job's first event is incorrect."
def test99EventSplit(self): """ _test99EventSplit_ Test event based job splitting when the number of events per job is 99, this should result in two jobs. """ splitter = SplitterFactory() jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(events_per_job = 99, performance = self.performanceParams) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 2, \ "ERROR: JobFactory created %s jobs not two" % len(jobGroups[0].jobs) firstEvents = [] for job in jobGroups[0].jobs: assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." self.assertTrue(job["mask"].getMaxEvents() == 99 or job['mask'].getMaxEvents() is None, "ERROR: Job's max events is incorrect.") assert job["mask"]["FirstEvent"] in [0, 99], \ "ERROR: Job's first event is incorrect." assert job["mask"]["FirstEvent"] not in firstEvents, \ "ERROR: Job's first event is repeated." firstEvents.append(job["mask"]["FirstEvent"]) return
def testMultipleRunsCombine(self): """ _testMultipleRunsCombine_ Test run based job splitting when the number of jobs is less then the number of files, with multiple files """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleRunSubscription) jobGroups = jobFactory(files_per_job=2) assert len(jobGroups) == 4, \ "ERROR: JobFactory didn't return one JobGroup per run." assert len(jobGroups[1].jobs) == 2, \ "ERROR: JobFactory didn't put only one job in the first job" #Last one in the queue should have one job, previous two (three files per run) self.assertEqual(len(jobGroups[1].jobs.pop().getFiles(type="lfn")), 1) self.assertEqual(len(jobGroups[1].jobs.pop().getFiles(type="lfn")), 2) return
def test_getParents(self): """ _getParents_ Check that we can do the same as the TwoFileBased """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=2, include_parents=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 5) fileList = [] for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles()), 2) for file in job.getFiles(type="lfn"): fileList.append(file) self.assertEqual(len(fileList), 10) for j in jobGroups[0].jobs: for f in j['input_files']: self.assertEqual(len(f['parents']), 1) self.assertEqual(list(f['parents'])[0]['lfn'], '/parent/lfn/') return
def testExactEvents(self): """ _testExactEvents_ Test event based job splitting when the number of events per job is exactly the same as the number of events in the input file. """ splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = self.singleFileSubscription) jobGroups = jobFactory(events_per_job = 100) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 1) job = jobGroups[0].jobs.pop() self.assertEqual(job.getFiles(type = "lfn"), ["/some/file/name"]) self.assertEqual(job["mask"].getMaxEvents(), None) self.assertEqual(job["mask"]["FirstEvent"], 0) return
def testMoreFiles(self): """ _testMoreFiles_ Test file based job splitting when the number of files per job is greater than the number of files in the input fileset. """ splitter = SplitterFactory() jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(files_per_job=10, performance=self.performanceParams) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." return
def test100EventMultipleSite(self): """ _test100EventMultipleSite_ Test job splitting into 100 event jobs when the input subscription has more than one file available, at different site combinations. """ splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = self.multipleSiteSubscription) jobGroups = jobFactory(events_per_job = 100) self.assertEqual(len(jobGroups), 2) self.assertEqual(len(jobGroups[0].jobs), 5) self.assertEqual(len(jobGroups[1].jobs), 5) for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles(type = "lfn")), 1) self.assertEqual(job["mask"].getMaxEvents(), None) assert job["mask"]["FirstEvent"] == 0, \ "ERROR: Job's first event is incorrect." return
def test2FileSplit(self): """ _test2FileSplit_ Test file based job splitting when the number of files per job is 2, this should result in five jobs. """ splitter = SplitterFactory() jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=2, performance=self.performanceParams) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 5, \ "ERROR: JobFactory didn't create two jobs." fileSet = set() for job in jobGroups[0].jobs: assert len(job.getFiles(type = "set")) == 2, \ "ERROR: Job contains incorrect number of files." for file in job.getFiles(type="lfn"): fileSet.add(file) assert len(fileSet) == 10, \ "ERROR: Not all files assinged to job." return
def test_addParents(self): """ _addParents_ Test our ability to add parents to a job """ splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = self.multipleFileSubscription) jobGroups = jobFactory(events_per_job = 50, include_parents = True) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 20) for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles(type = "lfn")), 1) self.assertTrue((job["mask"].getMaxEvents() == 50 and job["mask"]["FirstEvent"] == 0) or \ (job["mask"].getMaxEvents() is None and job["mask"]["FirstEvent"] == 50)) for f in job['input_files']: self.assertEqual(len(f['parents']), 1) self.assertEqual(list(f['parents'])[0]['lfn'], '/parent/lfn/') return
def test3FileSplit(self): """ _test3FileSplit_ Test file based job splitting when the number of files per job is 3, this should result in four jobs. """ splitter = SplitterFactory() jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=3, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 4) fileList = [] for job in jobGroups[0].jobs: assert len(job.getFiles(type = "list")) in [3, 1], \ "ERROR: Job contains incorrect number of files." for file in job.getFiles(type="lfn"): assert file not in fileList, \ "ERROR: File duplicated!" fileList.append(file) self.assertEqual(len(fileList), 10) return
def testExactEvents(self): """ _testExactEvents_ Test event based job splitting when the number of events per job is exactly the same as the number of events in the input file. """ splitter = SplitterFactory() jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(size_per_job = 1000) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." return
def test4WithLumiMask(self): """ _test4WithLumiMask_ Test file based job splitting when """ splitter = SplitterFactory() jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory( files_per_job=2, total_files=3, runs=['1', '2', '4', '5'], lumis=['100,130', '203,204,207,221', '401,405', '500, 520'], performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 2) fileList = [] for job in jobGroups[0].jobs: assert len(job.getFiles(type = "list")) in [2, 1], \ "ERROR: Job contains incorrect number of files." for file in job.getFiles(type="lfn"): assert file not in fileList, \ "ERROR: File duplicated!" fileList.append(file) self.assertEqual(len(fileList), 3) return
def testF_RunWhitelist(self): """ _runWhitelist_ Apparently we're too stupid to do the runlist in the GoodRunlist where it would make sense. """ splitter = SplitterFactory() oneSetSubscription = self.createSubscription(nFiles=10, lumisPerFile=1) jobFactory = splitter(package="WMCore.WMBS", subscription=oneSetSubscription) jobGroups = jobFactory(lumis_per_job=10, split_files_between_job=True, runWhitelist=[1], performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 1) self.assertEqual(len(jobGroups[0].jobs[0]['input_files']), 1) self.assertEqual(len(jobGroups[0].jobs[0]['input_files'][0]['runs']), 1) self.assertEqual(jobGroups[0].jobs[0]['input_files'][0]['runs'][0].run, 1) return
def testFilesetCloseout2(self): """ _testFilesetCloseout2_ Verify that the fail orphan file code does not fail files that have failed for other workflows. """ self.stuffWMBS() self.mergeFileset.markOpen(False) splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) # Get out all the good merge jobs out of the way. result = jobFactory(min_merge_size=1, max_merge_size=999999999999, max_merge_events=999999999) self.assertEqual(len(result), 1, "Error: Wrong number of job groups.") self.assertEqual(len(result[0].jobs), 2, "Error: Wrong number of jobs.") failedAction = self.daoFactory( classname="Subscriptions.GetFailedFiles") failedFiles = failedAction.execute(self.mergeSubscription["id"]) self.assertEqual( len(failedFiles), 4, "Error: Wrong number of failed files: %s" % failedFiles) return
def testLumiMaskAndWhitelist(self): """ _testLumiMaskAndWhitelist_ Test that we can use a lumi-mask to filter good runs/lumis. """ splitter = SplitterFactory() # Create 3 files with 100 events per lumi: # - file1 with 1 run of 8 lumis # - file2 with 2 runs of 2 lumis each # - file3 with 1 run of 5 lumis fileA = File(lfn="/this/is/file1", size=1000, events=800) fileB = File(lfn="/this/is/file2", size=1000, events=400) fileC = File(lfn="/this/is/file3", size=1000, events=500) lumiListA = [] for lumi in range(8): lumiListA.append(10 + lumi) fileA.addRun(Run(1, *lumiListA)) fileA.setLocation("somese.cern.ch") lumiListB1 = [] lumiListB2 = [] for lumi in range(2): lumiListB1.append(20 + lumi) lumiListB2.append(30 + lumi) fileB.addRun(Run(2, *lumiListB1)) fileB.addRun(Run(3, *lumiListB2)) fileB.setLocation("somese.cern.ch") lumiListC = [] for lumi in range(5): lumiListC.append(40 + lumi) fileC.addRun(Run(4, *lumiListC)) fileC.setLocation("somese.cern.ch") testFileset = Fileset(name='Fileset') testFileset.addFile(fileA) testFileset.addFile(fileB) testFileset.addFile(fileC) testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription) # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]} jobGroups = jobFactory(halt_job_on_file_boundaries=False, splitOnRun=False, events_per_job=850, runs=['1', '2', '4'], lumis=['10,14', '20,21', '40,41'], runWhitelist=[1, 4], performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 1) self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[10, 14]], 4: [[40, 41]]})
def testMoreRuns(self): """ _testMoreEvents_ Test run based job splitting when the number of runs per job is greater than the number of runs in the input file. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleFileSubscription) jobGroups = jobFactory(files_per_job=2) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." return
def test2FileSplit(self): """ _test2FileSplit_ Test file based job splitting when the number of files per job is 2, this should result in five jobs. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=2, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 5) fileList = [] for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles()), 2) for file in job.getFiles(type="lfn"): fileList.append(file) self.assertEqual(job["estimatedMemoryUsage"], 2300) self.assertEqual(job["estimatedDiskUsage"], 400 * 100 * 2) self.assertEqual(job["estimatedJobTime"], 12 * 100 * 2) self.assertEqual(len(fileList), 10) return
def test100EventMultipleSite(self): """ _test100EventMultipleSite_ Test job splitting into 100 event jobs when the input subscription has more than one file available, at different site combinations. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleSiteSubscription) jobGroups = jobFactory(events_per_job=self.eventsPerJob, performance=self.performanceParams) self.assertEqual(len(jobGroups), 2) self.assertEqual(len(jobGroups[0].jobs), 5) self.assertEqual(len(jobGroups[1].jobs), 5) self.assertEqual(jobGroups[0].jobs[0]['possiblePSN'], set(['s1', 's2'])) self.assertEqual(jobGroups[1].jobs[0]['possiblePSN'], set(['s1'])) for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles(type="lfn")), 1) self.assertEqual(job["mask"].getMaxEvents(), self.eventsPerJob) self.assertEqual(job["mask"]["FirstEvent"], 0) self.assertEqual(job["estimatedJobTime"], 100 * 12) self.assertEqual(job["estimatedDiskUsage"], 400 * 100) self.assertEqual(job["estimatedMemoryUsage"], 2300) return