def loadData(self): """ _loadData_ Load all data that is associated with the jobgroup. This includes loading all the subscription information, the output fileset information and all the jobs that are associated with the group. """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() self.subscription.loadData() self.output.loadData() loadAction = self.daofactory(classname="JobGroup.LoadJobs") result = loadAction.execute(self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) self.jobs = [] self.newjobs = [] for jobID in result: newJob = Job(id=jobID["id"]) newJob.loadData() self.add(newJob) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return
def loadData(self): """ _loadData_ Load all data that is associated with the jobgroup. This includes loading all the subscription information, the output fileset information and all the jobs that are associated with the group. """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() self.subscription.loadData() self.output.loadData() loadAction = self.daofactory(classname = "JobGroup.LoadJobs") result = loadAction.execute(self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) self.jobs = [] self.newjobs = [] for jobID in result: newJob = Job(id = jobID["id"]) newJob.loadData() self.add(newJob) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return
def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102]) testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob['mask'].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob['mask'].filterRunLumisByMask([run0, run1]) return
def testLoadData(self): """ _testLoadData_ Create a job and save it to the database. Load it back from the database using the name and the id. Verify that all job information is correct including input files and the job mask. """ testJobA = self.createTestJob() testJobA["mask"]["FirstEvent"] = 1 testJobA["mask"]["LastEvent"] = 2 testJobA["mask"]["FirstLumi"] = 3 testJobA["mask"]["LastLumi"] = 4 testJobA["mask"]["FirstRun"] = 5 testJobA["mask"]["LastRun"] = 6 testJobA.save() testJobB = Job(id=testJobA["id"]) testJobC = Job(name=testJobA["name"]) testJobB.loadData() testJobC.loadData() assert ( (testJobA["id"] == testJobB["id"]) and (testJobA["name"] == testJobB["name"]) and (testJobA["jobgroup"] == testJobB["jobgroup"]) and (testJobA["couch_record"] == testJobB["couch_record"]) and (testJobA["location"] == testJobB["location"]) ), "ERROR: Load from ID didn't load everything correctly" assert ( (testJobA["id"] == testJobC["id"]) and (testJobA["name"] == testJobC["name"]) and (testJobA["jobgroup"] == testJobC["jobgroup"]) and (testJobA["couch_record"] == testJobC["couch_record"]) and (testJobA["location"] == testJobC["location"]) ), "ERROR: Load from name didn't load everything correctly" assert testJobA["mask"] == testJobB["mask"], "ERROR: Job mask did not load properly" assert testJobA["mask"] == testJobC["mask"], "ERROR: Job mask did not load properly" goldenFiles = testJobA.getFiles() for testFile in testJobB.getFiles(): assert testFile in goldenFiles, "ERROR: Job loaded an unknown file" goldenFiles.remove(testFile) assert len(goldenFiles) == 0, "ERROR: Job didn't load all files" goldenFiles = testJobA.getFiles() for testFile in testJobC.getFiles(): assert testFile in goldenFiles, "ERROR: Job loaded an unknown file" goldenFiles.remove(testFile) assert len(goldenFiles) == 0, "ERROR: Job didn't load all files" return
def testGetFiles(self): """ _testGetFiles_ Test the Job's getFiles() method. This should load the files from the database if they haven't been loaded already. """ testJobA = self.createTestJob() testJobB = Job(id=testJobA["id"]) testJobB.loadData() goldenFiles = testJobA.getFiles() for testFile in testJobB.getFiles(): assert testFile in goldenFiles, "ERROR: Job loaded an unknown file: %s" % testFile goldenFiles.remove(testFile) assert len(goldenFiles) == 0, "ERROR: Job didn't load all files" return
def testGetFiles(self): """ _testGetFiles_ Test the Job's getFiles() method. This should load the files from the database if they haven't been loaded already. """ testJobA = self.createTestJob() testJobB = Job(id=testJobA["id"]) testJobB.loadData() goldenFiles = testJobA.getFiles() for testFile in testJobB.getFiles(): assert testFile in goldenFiles, \ "ERROR: Job loaded an unknown file: %s" % testFile goldenFiles.remove(testFile) assert not goldenFiles, "ERROR: Job didn't load all files" return
def testSaveTransaction(self): """ _testSaveTransaction_ Create a job and a job mask and save them both to the database. Load the job from the database and verify that everything was written correctly. Begin a new transaction and update the job mask again. Load the mask and verify that it's correct. Finally, rollback the transaction and reload the mask to verify that it is in the correct state. """ testJobA = self.createTestJob() testJobA["mask"]["FirstEvent"] = 1 testJobA["mask"]["LastEvent"] = 2 testJobA["mask"]["FirstLumi"] = 3 testJobA["mask"]["LastLumi"] = 4 testJobA["mask"]["FirstRun"] = 5 testJobA["mask"]["LastRun"] = 6 testJobA.save() testJobB = Job(id=testJobA["id"]) testJobB.loadData() assert testJobA["mask"] == testJobB["mask"], "ERROR: Job mask did not load properly" myThread = threading.currentThread() myThread.transaction.begin() testJobA["mask"]["FirstEvent"] = 7 testJobA["mask"]["LastEvent"] = 8 testJobA["mask"]["FirstLumi"] = 9 testJobA["mask"]["LastLumi"] = 10 testJobA["mask"]["FirstRun"] = 11 testJobA["mask"]["LastRun"] = 12 testJobA["name"] = "stevesJob" testJobA["couch_record"] = "someCouchRecord" testJobA["location"] = "test2.site.ch" testJobA.save() testJobC = Job(id=testJobA["id"]) testJobC.loadData() assert testJobA["mask"] == testJobC["mask"], "ERROR: Job mask did not load properly" assert testJobC["name"] == "stevesJob", "ERROR: Job name did not save" assert testJobC["couch_record"] == "someCouchRecord", "ERROR: Job couch record did not save" assert testJobC["location"] == "test2.site.ch", "ERROR: Job site did not save" myThread.transaction.rollback() testJobD = Job(id=testJobA["id"]) testJobD.loadData() assert testJobB["mask"] == testJobD["mask"], "ERROR: Job mask did not load properly" return
def testLoadData(self): """ _testLoadData_ Create a job and save it to the database. Load it back from the database using the name and the id. Verify that all job information is correct including input files and the job mask. """ testJobA = self.createTestJob() testJobA["mask"]["FirstEvent"] = 1 testJobA["mask"]["LastEvent"] = 2 testJobA["mask"]["FirstLumi"] = 3 testJobA["mask"]["LastLumi"] = 4 testJobA["mask"]["FirstRun"] = 5 testJobA["mask"]["LastRun"] = 6 testJobA.save() testJobB = Job(id=testJobA["id"]) testJobC = Job(name=testJobA["name"]) testJobB.loadData() testJobC.loadData() assert (testJobA["id"] == testJobB["id"]) and \ (testJobA["name"] == testJobB["name"]) and \ (testJobA["jobgroup"] == testJobB["jobgroup"]) and \ (testJobA["couch_record"] == testJobB["couch_record"]) and \ (testJobA["location"] == testJobB["location"]), \ "ERROR: Load from ID didn't load everything correctly" assert (testJobA["id"] == testJobC["id"]) and \ (testJobA["name"] == testJobC["name"]) and \ (testJobA["jobgroup"] == testJobC["jobgroup"]) and \ (testJobA["couch_record"] == testJobC["couch_record"]) and \ (testJobA["location"] == testJobC["location"]), \ "ERROR: Load from name didn't load everything correctly" assert testJobA["mask"] == testJobB["mask"], \ "ERROR: Job mask did not load properly" assert testJobA["mask"] == testJobC["mask"], \ "ERROR: Job mask did not load properly" goldenFiles = testJobA.getFiles() for testFile in testJobB.getFiles(): assert testFile in goldenFiles, \ "ERROR: Job loaded an unknown file" goldenFiles.remove(testFile) assert len(goldenFiles) == 0, \ "ERROR: Job didn't load all files" goldenFiles = testJobA.getFiles() for testFile in testJobC.getFiles(): assert testFile in goldenFiles, \ "ERROR: Job loaded an unknown file" goldenFiles.remove(testFile) assert len(goldenFiles) == 0, \ "ERROR: Job didn't load all files" return
def verifyFileMetaData(self, jobID, fwkJobReportFiles): """ _verifyFileMetaData_ Verify that all the files that were output by a job made it into WMBS correctly. Compare the contents of WMBS to the files in the frameworks job report. Note that fwkJobReportFiles is a list of DataStructs File objects. """ testJob = Job(id = jobID) testJob.loadData() inputLFNs = [] for inputFile in testJob["input_files"]: inputLFNs.append(inputFile["lfn"]) for fwkJobReportFile in fwkJobReportFiles: outputFile = File(lfn = fwkJobReportFile["lfn"]) outputFile.loadData(parentage = 1) assert outputFile["events"] == int(fwkJobReportFile["events"]), \ "Error: Output file has wrong events: %s, %s" % \ (outputFile["events"], fwkJobReportFile["events"]) assert outputFile["size"] == int(fwkJobReportFile["size"]), \ "Error: Output file has wrong size: %s, %s" % \ (outputFile["size"], fwkJobReportFile["size"]) for ckType in fwkJobReportFile["checksums"]: assert ckType in outputFile["checksums"], \ "Error: Output file is missing checksums: %s" % ckType assert outputFile["checksums"][ckType] == fwkJobReportFile["checksums"][ckType], \ "Error: Checksums don't match." assert len(fwkJobReportFile["checksums"]) == \ len(outputFile["checksums"]), \ "Error: Wrong number of checksums." jobType = self.getJobTypeAction.execute(jobID = jobID) if jobType == "Merge": assert str(outputFile["merged"]) == "True", \ "Error: Merge jobs should output merged files." else: assert outputFile["merged"] == fwkJobReportFile["merged"], \ "Error: Output file merged output is wrong: %s, %s" % \ (outputFile["merged"], fwkJobReportFile["merged"]) assert len(outputFile["locations"]) == 1, \ "Error: outputfile should have one location: %s" % outputFile["locations"] assert list(outputFile["locations"])[0] == list(fwkJobReportFile["locations"])[0], \ "Error: wrong location for file." assert len(outputFile["parents"]) == len(inputLFNs), \ "Error: Output file has wrong number of parents." for outputParent in outputFile["parents"]: assert outputParent["lfn"] in inputLFNs, \ "Error: Unknown parent file: %s" % outputParent["lfn"] fwjrRuns = {} for run in fwkJobReportFile["runs"]: fwjrRuns[run.run] = run.lumis for run in outputFile["runs"]: assert run.run in fwjrRuns, \ "Error: Extra run in output: %s" % run.run for lumi in run: assert lumi in fwjrRuns[run.run], \ "Error: Extra lumi: %s" % lumi fwjrRuns[run.run].remove(lumi) if len(fwjrRuns[run.run]) == 0: del fwjrRuns[run.run] assert len(fwjrRuns) == 0, \ "Error: Missing runs, lumis: %s" % fwjrRuns testJobGroup = JobGroup(id = testJob["jobgroup"]) testJobGroup.loadData() jobGroupFileset = testJobGroup.output jobGroupFileset.loadData() assert outputFile["id"] in jobGroupFileset.getFiles(type = "id"), \ "Error: output file not in jobgroup fileset." if testJob["mask"]["FirstEvent"] == None: assert outputFile["first_event"] == 0, \ "Error: first event not set correctly: 0, %s" % \ outputFile["first_event"] else: assert testJob["mask"]["FirstEvent"] == outputFile["first_event"], \ "Error: last event not set correctly: %s, %s" % \ (testJob["mask"]["FirstEvent"], outputFile["first_event"]) return
def verifyFileMetaData(self, jobID, fwkJobReportFiles): """ _verifyFileMetaData_ Verify that all the files that were output by a job made it into WMBS correctly. Compare the contents of WMBS to the files in the frameworks job report. Note that fwkJobReportFiles is a list of DataStructs File objects. """ testJob = Job(id = jobID) testJob.loadData() inputLFNs = [] for inputFile in testJob["input_files"]: inputLFNs.append(inputFile["lfn"]) for fwkJobReportFile in fwkJobReportFiles: outputFile = File(lfn = fwkJobReportFile["lfn"]) outputFile.loadData(parentage = 1) assert outputFile["events"] == int(fwkJobReportFile["events"]), \ "Error: Output file has wrong events: %s, %s" % \ (outputFile["events"], fwkJobReportFile["events"]) assert outputFile["size"] == int(fwkJobReportFile["size"]), \ "Error: Output file has wrong size: %s, %s" % \ (outputFile["size"], fwkJobReportFile["size"]) for ckType in fwkJobReportFile["checksums"].keys(): assert ckType in outputFile["checksums"].keys(), \ "Error: Output file is missing checksums: %s" % ckType assert outputFile["checksums"][ckType] == fwkJobReportFile["checksums"][ckType], \ "Error: Checksums don't match." assert len(fwkJobReportFile["checksums"].keys()) == \ len(outputFile["checksums"].keys()), \ "Error: Wrong number of checksums." jobType = self.getJobTypeAction.execute(jobID = jobID) if jobType == "Merge": assert str(outputFile["merged"]) == "True", \ "Error: Merge jobs should output merged files." else: assert outputFile["merged"] == fwkJobReportFile["merged"], \ "Error: Output file merged output is wrong: %s, %s" % \ (outputFile["merged"], fwkJobReportFile["merged"]) assert len(outputFile["locations"]) == 1, \ "Error: outputfile should have one location: %s" % outputFile["locations"] assert list(outputFile["locations"])[0] == list(fwkJobReportFile["locations"])[0], \ "Error: wrong location for file." assert len(outputFile["parents"]) == len(inputLFNs), \ "Error: Output file has wrong number of parents." for outputParent in outputFile["parents"]: assert outputParent["lfn"] in inputLFNs, \ "Error: Unknown parent file: %s" % outputParent["lfn"] fwjrRuns = {} for run in fwkJobReportFile["runs"]: fwjrRuns[run.run] = run.lumis for run in outputFile["runs"]: assert run.run in fwjrRuns, \ "Error: Extra run in output: %s" % run.run for lumi in run: assert lumi in fwjrRuns[run.run], \ "Error: Extra lumi: %s" % lumi fwjrRuns[run.run].remove(lumi) if len(fwjrRuns[run.run]) == 0: del fwjrRuns[run.run] assert len(fwjrRuns.keys()) == 0, \ "Error: Missing runs, lumis: %s" % fwjrRuns testJobGroup = JobGroup(id = testJob["jobgroup"]) testJobGroup.loadData() jobGroupFileset = testJobGroup.output jobGroupFileset.loadData() assert outputFile["id"] in jobGroupFileset.getFiles(type = "id"), \ "Error: output file not in jobgroup fileset." if testJob["mask"]["FirstEvent"] == None: assert outputFile["first_event"] == 0, \ "Error: first event not set correctly: 0, %s" % \ outputFile["first_event"] else: assert testJob["mask"]["FirstEvent"] == outputFile["first_event"], \ "Error: last event not set correctly: %s, %s" % \ (testJob["mask"]["FirstEvent"], outputFile["first_event"]) return
def testB_NoRunNoFileSplitting(self): """ _NoRunNoFileSplitting_ Test the splitting algorithm in the odder fringe cases that might be required. """ splitter = SplitterFactory() testSubscription = self.createSubscription(nFiles = 5, lumisPerFile = 5, twoSites = False) jobFactory = splitter(package = "WMCore.WMBS", subscription = testSubscription) jobGroups = jobFactory(lumis_per_job = 3, halt_job_on_file_boundaries = False, splitOnRun = False) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 9) # The first job should have three lumis from one run # The second three lumis from two different runs self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0L: [[0L, 2L]]}) self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {0L: [[3L, 4L]], 1L: [[100L, 100L]]}) # And it should still be the same when you load it out of the database j1 = Job(id = jobs[1]['id']) j1.loadData() self.assertEqual(j1['mask'].getRunAndLumis(), {0L: [[3L, 4L]], 1L: [[100L, 100L]]}) # Assert that this works differently with file splitting on and run splitting on testSubscription = self.createSubscription(nFiles = 5, lumisPerFile = 5, twoSites = False) jobFactory = splitter(package = "WMCore.WMBS", subscription = testSubscription) jobGroups = jobFactory(lumis_per_job = 3, halt_job_on_file_boundaries = True, splitOnRun = True) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 10) # In this case it should slice things up so that each job only has one run # in it. self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0L: [[0L, 2L]]}) self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {0L: [[3L, 4L]]}) testSubscription = self.createSubscription(nFiles = 5, lumisPerFile = 4, twoSites = False) jobFactory = splitter(package = "WMCore.WMBS", subscription = testSubscription) jobGroups = jobFactory(lumis_per_job = 10, halt_job_on_file_boundaries = False, splitOnRun = False) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0]['mask']['runAndLumis'], {0L: [[0L, 3L]], 1L: [[100L, 103L]], 2L: [[200L, 201L]]}) self.assertEqual(jobs[1]['mask']['runAndLumis'], {2L: [[202L, 203L]], 3L: [[300L, 303L]], 4L: [[400L, 403L]]}) j = Job(id = jobs[0]['id']) j.loadData() self.assertEqual(len(j['input_files']), 3) for f in j['input_files']: self.assertTrue(f['events'], 100) self.assertTrue(f['size'], 1000) return
def testB_NoRunNoFileSplitting(self): """ _NoRunNoFileSplitting_ Test the splitting algorithm in the odder fringe cases that might be required. """ splitter = SplitterFactory() testSubscription = self.createSubscription(nFiles=5, lumisPerFile=5, twoSites=False) jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroups = jobFactory(lumis_per_job=3, halt_job_on_file_boundaries=False, splitOnRun=False, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 9) # The first job should have three lumis from one run # The second three lumis from two different runs self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0: [[0, 2]]}) self.assertEqual(jobs[0]['estimatedJobTime'], 60 * 12) self.assertEqual(jobs[0]['estimatedDiskUsage'], 60 * 400) self.assertEqual(jobs[0]['estimatedMemoryUsage'], 2300) job1runLumi = jobs[1]['mask'].getRunAndLumis() self.assertEqual(job1runLumi[0][0][0] + 1, job1runLumi[0][0][1]) # Run 0, startLumi+1 == endLumi self.assertEqual(job1runLumi[1][0][0], job1runLumi[1][0][1]) # Run 1, startLumi == endLumi self.assertEqual(jobs[1]['estimatedJobTime'], 60 * 12) self.assertEqual(jobs[1]['estimatedDiskUsage'], 60 * 400) self.assertEqual(jobs[1]['estimatedMemoryUsage'], 2300) # And it should still be the same when you load it out of the database j1 = Job(id=jobs[1]['id']) j1.loadData() self.assertEqual(j1['mask'].getRunAndLumis(), { 0: [[3, 4]], 1: [[100, 100]] }) # Assert that this works differently with file splitting on and run splitting on testSubscription = self.createSubscription(nFiles=5, lumisPerFile=5, twoSites=False) jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroups = jobFactory(lumis_per_job=3, halt_job_on_file_boundaries=True, splitOnRun=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 10) # In this case it should slice things up so that each job only has one run # in it. self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0: [[0, 2]]}) self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {0: [[3, 4]]}) testSubscription = self.createSubscription(nFiles=5, lumisPerFile=4, twoSites=False) jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroups = jobFactory(lumis_per_job=10, halt_job_on_file_boundaries=False, splitOnRun=False, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0]['mask']['runAndLumis'], { 0: [[0, 3]], 1: [[100, 103]], 2: [[200, 201]] }) self.assertEqual(jobs[1]['mask']['runAndLumis'], { 2: [[202, 203]], 3: [[300, 303]], 4: [[400, 403]] }) j = Job(id=jobs[0]['id']) j.loadData() self.assertEqual(len(j['input_files']), 3) for f in j['input_files']: self.assertTrue(f['events'], 100) self.assertTrue(f['size'], 1000) return
def testA_FileSplitting(self): """ _FileSplitting_ Test that things work if we split files between jobs """ splitter = SplitterFactory() oneSetSubscription = self.createSubscription(nFiles = 10, lumisPerFile = 1) jobFactory = splitter(package = "WMCore.WMBS", subscription = oneSetSubscription) jobGroups = jobFactory(lumis_per_job = 3, halt_job_on_file_boundaries = True) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 10) for job in jobGroups[0].jobs: self.assertTrue(len(job['input_files']), 1) twoLumiFiles = self.createSubscription(nFiles = 5, lumisPerFile = 2) jobFactory = splitter(package = "WMCore.WMBS", subscription = twoLumiFiles) jobGroups = jobFactory(lumis_per_job = 1, halt_job_on_file_boundaries = True) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 10) for job in jobGroups[0].jobs: self.assertEqual(len(job['input_files']), 1) wholeLumiFiles = self.createSubscription(nFiles = 5, lumisPerFile = 3) jobFactory = splitter(package = "WMCore.WMBS", subscription = wholeLumiFiles) jobGroups = jobFactory(lumis_per_job = 2, halt_job_on_file_boundaries = True) self.assertEqual(len(jobGroups), 1) # 10 because we split on run boundaries self.assertEqual(len(jobGroups[0].jobs), 10) jobList = jobGroups[0].jobs for job in jobList: # Have should have one file, half two self.assertTrue(len(job['input_files']) in [1,2]) mask0 = jobList[0]['mask'].getRunAndLumis() self.assertEqual(mask0, {0L: [[0L, 1L]]}) mask1 = jobList[1]['mask'].getRunAndLumis() self.assertEqual(mask1, {0L: [[2L, 2L]]}) mask2 = jobList[2]['mask'].getRunAndLumis() self.assertEqual(mask2, {1L: [[100L, 101L]]}) mask3 = jobList[3]['mask'].getRunAndLumis() self.assertEqual(mask3, {1L: [[102L, 102L]]}) j0 = Job(id = jobList[0]['id']) j0.loadData() self.assertEqual(j0['mask'].getRunAndLumis(), {0L: [[0L, 1L]]}) # Do it with multiple sites twoSiteSubscription = self.createSubscription(nFiles = 5, lumisPerFile = 2, twoSites = True) jobFactory = splitter(package = "WMCore.WMBS", subscription = twoSiteSubscription) jobGroups = jobFactory(lumis_per_job = 1, halt_job_on_file_boundaries = True) self.assertEqual(len(jobGroups), 2) self.assertEqual(len(jobGroups[0].jobs), 10) for job in jobGroups[0].jobs: self.assertEqual(len(job['input_files']), 1)
def testSaveTransaction(self): """ _testSaveTransaction_ Create a job and a job mask and save them both to the database. Load the job from the database and verify that everything was written correctly. Begin a new transaction and update the job mask again. Load the mask and verify that it's correct. Finally, rollback the transaction and reload the mask to verify that it is in the correct state. """ testJobA = self.createTestJob() testJobA["mask"]["FirstEvent"] = 1 testJobA["mask"]["LastEvent"] = 2 testJobA["mask"]["FirstLumi"] = 3 testJobA["mask"]["LastLumi"] = 4 testJobA["mask"]["FirstRun"] = 5 testJobA["mask"]["LastRun"] = 6 testJobA.save() testJobB = Job(id=testJobA["id"]) testJobB.loadData() assert testJobA["mask"] == testJobB["mask"], \ "ERROR: Job mask did not load properly" myThread = threading.currentThread() myThread.transaction.begin() testJobA["mask"]["FirstEvent"] = 7 testJobA["mask"]["LastEvent"] = 8 testJobA["mask"]["FirstLumi"] = 9 testJobA["mask"]["LastLumi"] = 10 testJobA["mask"]["FirstRun"] = 11 testJobA["mask"]["LastRun"] = 12 testJobA["name"] = "stevesJob" testJobA["couch_record"] = "someCouchRecord" testJobA["location"] = "test2.site.ch" testJobA.save() testJobC = Job(id=testJobA["id"]) testJobC.loadData() assert testJobA["mask"] == testJobC["mask"], \ "ERROR: Job mask did not load properly" assert testJobC["name"] == "stevesJob", \ "ERROR: Job name did not save" assert testJobC["couch_record"] == "someCouchRecord", \ "ERROR: Job couch record did not save" assert testJobC["location"] == "test2.site.ch", \ "ERROR: Job site did not save" myThread.transaction.rollback() testJobD = Job(id=testJobA["id"]) testJobD.loadData() assert testJobB["mask"] == testJobD["mask"], \ "ERROR: Job mask did not load properly" return
def testA_FileSplitting(self): """ _FileSplitting_ Test that things work if we split files between jobs """ splitter = SplitterFactory() oneSetSubscription = self.createSubscription(nFiles=10, lumisPerFile=1) jobFactory = splitter(package="WMCore.WMBS", subscription=oneSetSubscription) jobGroups = jobFactory(lumis_per_job=3, halt_job_on_file_boundaries=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 10) for job in jobGroups[0].jobs: self.assertTrue(len(job['input_files']), 1) self.assertEqual(job['estimatedJobTime'], 100 * 12) self.assertEqual(job['estimatedDiskUsage'], 100 * 400) self.assertEqual(job['estimatedMemoryUsage'], 2300) twoLumiFiles = self.createSubscription(nFiles=5, lumisPerFile=2) jobFactory = splitter(package="WMCore.WMBS", subscription=twoLumiFiles) jobGroups = jobFactory(lumis_per_job=1, halt_job_on_file_boundaries=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 10) for job in jobGroups[0].jobs: self.assertEqual(len(job['input_files']), 1) self.assertEqual(job['estimatedJobTime'], 50 * 12) self.assertEqual(job['estimatedDiskUsage'], 50 * 400) self.assertEqual(job['estimatedMemoryUsage'], 2300) wholeLumiFiles = self.createSubscription(nFiles=5, lumisPerFile=3) jobFactory = splitter(package="WMCore.WMBS", subscription=wholeLumiFiles) jobGroups = jobFactory(lumis_per_job=2, halt_job_on_file_boundaries=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) # 10 because we split on run boundaries self.assertEqual(len(jobGroups[0].jobs), 10) jobList = jobGroups[0].jobs for idx, job in enumerate(jobList, start=1): # Have should have one file, half two self.assertEqual(len(job['input_files']), 1) if idx % 2 == 0: self.assertEqual(job['estimatedJobTime'], (1.0 * round(100 / 3)) * 12) self.assertEqual(job['estimatedDiskUsage'], (1.0 * round(100 / 3)) * 400) else: self.assertEqual(job['estimatedJobTime'], (2.0 * round(100 / 3)) * 12) self.assertEqual(job['estimatedDiskUsage'], (2.0 * round(100 / 3)) * 400) self.assertEqual(job['estimatedMemoryUsage'], 2300) mask0 = jobList[0]['mask'].getRunAndLumis() self.assertEqual(mask0, {0: [[0, 1]]}) mask1 = jobList[1]['mask'].getRunAndLumis() self.assertEqual(mask1, {0: [[2, 2]]}) mask2 = jobList[2]['mask'].getRunAndLumis() self.assertEqual(mask2, {1: [[100, 101]]}) mask3 = jobList[3]['mask'].getRunAndLumis() self.assertEqual(mask3, {1: [[102, 102]]}) j0 = Job(id=jobList[0]['id']) j0.loadData() self.assertEqual(j0['mask'].getRunAndLumis(), {0: [[0, 1]]}) # Do it with multiple sites twoSiteSubscription = self.createSubscription(nFiles=5, lumisPerFile=2, twoSites=True) jobFactory = splitter(package="WMCore.WMBS", subscription=twoSiteSubscription) jobGroups = jobFactory(lumis_per_job=1, halt_job_on_file_boundaries=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 2) self.assertEqual(len(jobGroups[0].jobs), 10) for job in jobGroups[0].jobs: self.assertEqual(len(job['input_files']), 1) self.assertEqual(job['estimatedJobTime'], 50 * 12) self.assertEqual(job['estimatedDiskUsage'], 50 * 400) self.assertEqual(job['estimatedMemoryUsage'], 2300)
def testB_NoRunNoFileSplitting(self): """ _NoRunNoFileSplitting_ Test the splitting algorithm in the odder fringe cases that might be required. """ splitter = SplitterFactory() testSubscription = self.createSubscription(nFiles=5, lumisPerFile=5, twoSites=False) jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroups = jobFactory(lumis_per_job=3, halt_job_on_file_boundaries=False, splitOnRun=False, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 9) # The first job should have three lumis from one run # The second three lumis from two different runs self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0: [[0, 2]]}) self.assertEqual(jobs[0]['estimatedJobTime'], 60 * 12) self.assertEqual(jobs[0]['estimatedDiskUsage'], 60 * 400) self.assertEqual(jobs[0]['estimatedMemoryUsage'], 2300) job1runLumi = jobs[1]['mask'].getRunAndLumis() self.assertEqual(job1runLumi[0][0][0] + 1, job1runLumi[0][0][1]) # Run 0, startLumi+1 == endLumi self.assertEqual(job1runLumi[1][0][0], job1runLumi[1][0][1]) # Run 1, startLumi == endLumi self.assertEqual(jobs[1]['estimatedJobTime'], 60 * 12) self.assertEqual(jobs[1]['estimatedDiskUsage'], 60 * 400) self.assertEqual(jobs[1]['estimatedMemoryUsage'], 2300) # And it should still be the same when you load it out of the database j1 = Job(id=jobs[1]['id']) j1.loadData() self.assertEqual(j1['mask'].getRunAndLumis(), {0: [[3, 4]], 1: [[100, 100]]}) # Assert that this works differently with file splitting on and run splitting on testSubscription = self.createSubscription(nFiles=5, lumisPerFile=5, twoSites=False) jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroups = jobFactory(lumis_per_job=3, halt_job_on_file_boundaries=True, splitOnRun=True, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 10) # In this case it should slice things up so that each job only has one run # in it. self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0: [[0, 2]]}) self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {0: [[3, 4]]}) testSubscription = self.createSubscription(nFiles=5, lumisPerFile=4, twoSites=False) jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroups = jobFactory(lumis_per_job=10, halt_job_on_file_boundaries=False, splitOnRun=False, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 2) self.assertEqual(jobs[0]['mask']['runAndLumis'], {0: [[0, 3]], 1: [[100, 103]], 2: [[200, 201]]}) self.assertEqual(jobs[1]['mask']['runAndLumis'], {2: [[202, 203]], 3: [[300, 303]], 4: [[400, 403]]}) j = Job(id=jobs[0]['id']) j.loadData() self.assertEqual(len(j['input_files']), 3) for f in j['input_files']: self.assertTrue(f['events'], 100) self.assertTrue(f['size'], 1000) return