def createSubscription(self, nFiles, lumisPerFile, twoSites=False, rand=False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() parentFile = File('%s_parent' % baseName, size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) parentFile.create() for i in range(nFiles): newFile = File(lfn='%s_%i' % (baseName, i), size=1000, events=100, locations="T1_US_FNAL_Disk") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn='%s_%i_2' % (baseName, i), size=1000, events=100, locations="T2_CH_CERN") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="LumiBased", type="Processing") testSubscription.create() return testSubscription
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T1_US_FNAL_Disk') newFile.create() testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T2_CH_CERN') newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() return testSubscription
def testFilesWithoutOtherSubscriptions(self): """ _testFilesWithoutOtherSubscriptions_ Test the case where files only in the delete subscription can happen if cleanup of the other subscriptions is fast """ testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test") testWorkflowA.create() testFileset = Fileset(name="TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size=1000, events=100, locations=set(["somese.cern.ch"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription( fileset=testFileset, workflow=testWorkflowA, split_algo="SiblingProcessingBased", type="Processing" ) testSubscriptionA.create() splitter = SplitterFactory() deleteFactoryA = splitter(package="WMCore.WMBS", subscription=testSubscriptionA) result = deleteFactoryA(files_per_job=50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102]) testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob['mask'].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob['mask'].filterRunLumisByMask([run0, run1]) return
def generateFakeMCFile(self, numEvents = 100, firstEvent = 1, lastEvent = 100, firstLumi = 1, lastLumi = 10, index = 1): #MC comes with only one MCFakeFile singleMCFileset = Fileset(name = "MCTestFileset %i" % index) singleMCFileset.create() newFile = File("MCFakeFileTest %i" % index, size = 1000, events = numEvents, locations = set(["somese.cern.ch"])) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent newFile.create() singleMCFileset.addFile(newFile) singleMCFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() singleMCFileSubscription = Subscription(fileset = singleMCFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Production") singleMCFileSubscription.create() return singleMCFileSubscription
def createLargeFileBlock(self): """ _createLargeFileBlock_ Creates a large group of files for testing """ testFileset = Fileset(name = "TestFilesetX") testFileset.create() for i in range(5000): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf003", task="Test" ) testWorkflow.create() largeSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") largeSubscription.create() return largeSubscription
def createSubscriptionWithFileABC(self): """" _createSubscriptionWithFileABC_ Create a subscription where the input fileset has three files. Also create a second subscription that has acquired two of the files. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWorkflow2 = Workflow(spec="specBOGUS.xml", owner="Simon", name="wfBOGUS", task="Test") testWorkflow2.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=20, locations=set(["test.site.ch"])) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=20, locations=set(["test.site.ch"])) testFileB.addRun(Run(1, *[46])) testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=20, locations=set(["test.site.ch"])) testFileC.addRun(Run(2, *[48])) testFileA.create() testFileB.create() testFileC.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testSubscription2 = Subscription(fileset=testFileset, workflow=testWorkflow2) testSubscription2.create() testSubscription2.acquireFiles([testFileA, testFileB]) #return (testSubscription, testFileset, testWorkflow, testFileA, # testFileB, testFileC) return (testSubscription, testFileA, testFileB, testFileC)
def createTestSubscription(self, nFiles, nSites=1, closeFileset=False): """ _createTestSubscription_ Create a set of test subscriptions for testing purposes. """ if nSites > self.nSites: nSites = self.nSites testFileset = Fileset(name="TestFileset") testFileset.create() # Create a testWorkflow testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() # Create the files for each site for s in range(nSites): for i in range(nFiles): newFile = File(makeUUID(), size=1024, events=100, locations=set(["site%i.cern.ch" % s])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing" ) testSubscription.create() # Close the fileset if closeFileset: testFileset.markOpen(isOpen=False) return testSubscription
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testWorkflow = Workflow(spec="spec.xml", owner="dmwm", name="testWorkflow_%s" % baseName[:4], task="Test") testWorkflow.create() testFileset = Fileset(name=baseName) for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T1_US_FNAL_Disk') testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T2_CH_CERN') testFileset.addFile(newFile) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() return testSubscription
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == "mysql": return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() for i in range(nFiles): newFile = self.createFile("%s_%i" % (baseName, i), nEventsPerFile, i, lumisPerFile, "somese.cern.ch") newFile.create() testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile("%s_%i_2" % (baseName, i), nEventsPerFile, i, lumisPerFile, "otherse.cern.ch") newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing" ) testSubscription.create() return testSubscription
def createLargeFileBlock(self): """ _createLargeFileBlock_ Creates a large group of files for testing """ testFileset = Fileset(name="TestFilesetX") testFileset.create() for _ in range(5000): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="mnorman", name="wf003", task="Test") testWorkflow.create() largeSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") largeSubscription.create() return largeSubscription
def testD_NonContinuousLumis(self): """ _NonContinuousLumis_ Test and see if LumiBased can work when the lumis are non continuous """ baseName = makeUUID() nFiles = 10 testFileset = Fileset(name = baseName) testFileset.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") # Set to two non-continuous lumi numbers lumis = [100 + i, 200 + i] newFile.addRun(Run(i, *lumis)) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = testSubscription) jobGroups = jobFactory(lumis_per_job = 2, halt_job_on_file_boundaries = False, splitOnRun = False, performance = self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 10) for j in jobs: runs = j['mask'].getRunAndLumis() for r in runs.keys(): self.assertEqual(len(runs[r]), 2) for l in runs[r]: # Each run should have two lumis # Each lumi should be of form [x, x] # meaning that the first and last lumis are the same self.assertEqual(len(l), 2) self.assertEqual(l[0], l[1]) self.assertEqual(j['estimatedJobTime'], 100 * 12) self.assertEqual(j['estimatedDiskUsage'], 100 * 400) self.assertEqual(j['estimatedMemoryUsage'], 2300) return
def _createThisSubscription(self, initialCounter=1): """ Private function to create a fileset and subscription with different fileset and file names :param initialCounter: just a simple integer to be appended to files :return: an splitter instance (jobFactory) """ splitter = SplitterFactory() # Create 3 files with 100 events per lumi: # - file1 with 1 run of 8 lumis # - file2 with 2 runs of 2 lumis each # - file3 with 1 run of 5 lumis testFileset = Fileset(name='Fileset%s' % initialCounter) fileA = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=800) lumiListA = [] for lumi in range(8): lumiListA.append(10 + lumi) fileA.addRun(Run(1, *lumiListA)) fileA.setLocation("T1_US_FNAL_Disk") initialCounter = int(initialCounter) + 1 fileB = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=400) lumiListB1 = [] lumiListB2 = [] for lumi in range(2): lumiListB1.append(20 + lumi) lumiListB2.append(30 + lumi) fileB.addRun(Run(2, *lumiListB1)) fileB.addRun(Run(3, *lumiListB2)) fileB.setLocation("T1_US_FNAL_Disk") initialCounter = int(initialCounter) + 1 fileC = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=500) lumiListC = [] for lumi in range(5): lumiListC.append(40 + lumi) fileC.addRun(Run(4, *lumiListC)) fileC.setLocation("T1_US_FNAL_Disk") testFileset.addFile(fileA) testFileset.addFile(fileB) testFileset.addFile(fileC) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) return jobFactory
def testLargeNumberOfFiles(self): """ _testLargeNumberOfFiles_ Setup a subscription with 500 files and verify that the splitting algo works correctly. """ testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test") testWorkflowA.create() testWorkflowB = Workflow(spec="specB.xml", owner="Steve", name="wfB", task="Test") testWorkflowB.create() testFileset = Fileset(name="TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size=1000, events=100, locations=set(["T2_CH_CERN"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription(fileset=testFileset, workflow=testWorkflowA, split_algo="FileBased", type="Processing") testSubscriptionA.create() testSubscriptionB = Subscription(fileset=testFileset, workflow=testWorkflowB, split_algo="SiblingProcessingBased", type="Processing") testSubscriptionB.create() testSubscriptionA.completeFiles(allFiles) splitter = SplitterFactory() deleteFactoryA = splitter(package="WMCore.WMBS", subscription=testSubscriptionB) result = deleteFactoryA(files_per_job=50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
def testD_HardLimitSplittingOnly(self): """ _testD_HardLimitSplittingOnly_ Checks that we can split a set of files where every file has a single lumi too big to fit in a runnable job """ splitter = SplitterFactory() # Create 3 single-big-lumi files testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail sing lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, max_events_per_lumi=800, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup") for i in range(1, 4): self.assertTrue( jobs[i - 1]['failedOnCreation'], "The job processing the second file should me marked for failure" ) self.assertEqual( jobs[i - 1]['failedReason'], "File /this/is/file%d has too many events (1000) in 1 lumi(s)" % i, "The reason for the failure is not accurate") return
def testHardLimitSplitting(self): """ _testHardLimitSplitting_ Test that we can specify a event limit, the algorithm shall take single lumi files with more events than the limit and mark them for failure """ splitter = SplitterFactory() # Create 3 files, the one in the middle is a "bad" file testFileset = Fileset(name="FilesetA") testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail single lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 5) # One job should be failed, the rest should be fine for jobNum in (0, 1, 3, 4): self.assertFalse(jobs[jobNum].get('failedOnCreation')) self.assertTrue(jobs[2]['failedOnCreation']) self.assertEqual( jobs[2]['failedReason'], 'File /this/is/file2 has a single lumi 1, in run 1 with too many events 1000 and it woud take 12000 sec to run' ) return
def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name = baseName) testFileset.create() parentFile = File('%s_parent' % (baseName), size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000, events = 100, locations = "otherse.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() return testSubscription
def createSubscriptionWithFileABC(self): """" _createSubscriptionWithFileABC_ Create a subscription where the input fileset has three files. Also create a second subscription that has acquired two of the files. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWorkflow2 = Workflow(spec="specBOGUS.xml", owner="Simon", name="wfBOGUS", task="Test") testWorkflow2.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=20, locations=set(["test.site.ch"])) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=20, locations=set(["test.site.ch"])) testFileB.addRun(Run(1, *[46])) testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=20, locations=set(["test.site.ch"])) testFileC.addRun(Run(2, *[48])) testFileA.create() testFileB.create() testFileC.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testSubscription2 = Subscription(fileset=testFileset, workflow=testWorkflow2) testSubscription2.create() testSubscription2.acquireFiles([testFileA, testFileB]) # return (testSubscription, testFileset, testWorkflow, testFileA, # testFileB, testFileC) return (testSubscription, testFileA, testFileB, testFileC)
def createCommonFileset(): """ Create a simple fileset with 2 files at the same location """ multipleFilesFileset = Fileset(name="TestFileset") newFile = File("/some/file/test1", size=1000, events=100) newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7])) newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test2", size=2000, events=200) newFile.addRun(Run(3, *[2, 8])) newFile.addRun(Run(4, *[3, 8])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test3", size=3000, events=300) newFile.addRun(Run(5, *[10, 11, 12])) newFile.addRun(Run(6, *[10, 11, 12])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test4", size=4000, events=400) newFile.addRun(Run(2, *[3, 8, 9])) newFile.addRun(Run(3, *[3, 4, 5, 6])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) multipleFilesFileset.create() return multipleFilesFileset
def stuffWMBS(self): """ _stuffWMBS_ Inject the workflow in WMBS and add the subscriptions """ testWorkflow = Workflow(spec = os.path.join(getTestBase(), "WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl"), owner = "/CN=OU/DN=SomeoneWithPermissions", name = "BogusRequest", task = "BogusTask", owner_vogroup = "", owner_vorole = "") testWorkflow.create() testMergeWorkflow = Workflow(spec = os.path.join(getTestBase(), "WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl"), owner = "/CN=OU/DN=SomeoneWithPermissions", name = "BogusRequest", task = "BogusTask/Merge", owner_vogroup = "", owner_vorole = "") testMergeWorkflow.create() testWMBSFileset = Fileset(name = "TopFileset") testWMBSFileset.create() testWMBSFilesetUnmerged = Fileset(name = "UnmergedFileset") testWMBSFilesetUnmerged.create() testFileA = File(lfn = "/this/is/a/lfnA" , size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12314])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFilesetUnmerged.addFile(testFileB) testWMBSFileset.commit() testWMBSFilesetUnmerged.commit() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testSubscriptionMerge = Subscription(fileset = testWMBSFilesetUnmerged, workflow = testMergeWorkflow, type = "Merge") testSubscriptionMerge.create() return (testSubscription, testSubscriptionMerge)
def testLargeNumberOfFiles(self): """ _testLargeNumberOfFiles_ Setup a subscription with 500 files and verify that the splitting algo works correctly. """ testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve", name = "wfA", task = "Test") testWorkflowA.create() testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve", name = "wfB", task = "Test") testWorkflowB.create() testFileset = Fileset(name = "TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription(fileset = testFileset, workflow = testWorkflowA, split_algo = "FileBased", type = "Processing") testSubscriptionA.create() testSubscriptionB = Subscription(fileset = testFileset, workflow = testWorkflowB, split_algo = "SiblingProcessingBased", type = "Processing") testSubscriptionB.create() testSubscriptionA.completeFiles(allFiles) splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = testSubscriptionB) result = deleteFactoryA(files_per_job = 50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
def testC_HardLimitSplitting(self): """ _testC_HardLimitSplitting_ Test that we can specify a event limit, the algorithm shall take single lumi files with more events than the limit and mark them for failure """ splitter = SplitterFactory() # Create 3 files, the one in the middle is a "bad" file testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail sing lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, max_events_per_lumi=800, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup") self.assertTrue( jobs[3]['failedOnCreation'], "The job processing the second file should me marked for failure") self.assertEqual( jobs[3]['failedReason'], "File /this/is/file2 has too many events (1000) in 1 lumi(s)", "The reason for the failure is not accurate")
def testHardLimitSplittingOnly(self): """ _testHardLimitSplittingOnly_ Checks that we can split a set of files where every file has a single lumi too big to fit in a runnable job """ splitter = SplitterFactory() # Create 3 single-big-lumi files testFileset = Fileset(name="FilesetA") testFileA = self.createFile("/this/is/file0", 1000, 0, 1, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file1", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file2", 1000, 2, 1, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Fail single lumis with more than 800 events and put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 3) for i in range(3): num = list(jobs[i]['mask']['runAndLumis'])[0] self.assertTrue(jobs[i]['failedOnCreation']) error = 'File /this/is/file%s has a single lumi %s, in run %s' % ( num, num, num) error += ' with too many events 1000 and it woud take 12000 sec to run' self.assertEqual(jobs[i]['failedReason'], error) return
def createFileCollection(self, name, nSubs, nFiles, workflowURL='test', site=None): """ _createFileCollection_ Create a collection of files for splitting into jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") testWorkflow.create() for sub in range(nSubs): nameStr = '%s-%i' % (name, sub) testFileset = Fileset(name=nameStr) testFileset.create() for f in range(nFiles): # pick a random site if not site: tmpSite = 'se.%s' % (random.choice(self.sites)) else: tmpSite = 'se.%s' % (site) testFile = File(lfn="/lfn/%s/%i" % (nameStr, f), size=1024, events=10) testFile.setLocation(tmpSite) testFile.create() testFileset.addFile(testFile) testFileset.commit() testFileset.markOpen(isOpen=0) testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() return
def testUpdateFailedDoc(self): """ _testUpdateFailedDoc_ Verify that the update function will work correctly and not throw a 500 error if the doc didn't make it into the database for some reason. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", seName="somese.cern.ch") testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() testFileA = File(lfn="SomeLFNA", events=1024, size=2048, locations=set(["somese.cern.ch"])) testFileA.create() testFileset.addFile(testFileA) testFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Merge" testJobA["couch_record"] = str(testJobA["id"]) change.propagate([testJobA], "new", "none") testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) self.assertTrue(testJobADoc.has_key("states")) self.assertTrue(testJobADoc["states"].has_key("1")) return
def testD_HardLimitSplittingOnly(self): """ _testD_HardLimitSplittingOnly_ Checks that we can split a set of files where every file has a single lumi too big to fit in a runnable job """ splitter = SplitterFactory() # Create 3 single-big-lumi files testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch") testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing" ) testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail sing lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory( halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, max_events_per_lumi=800, performance=self.performanceParams, ) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup") for i in range(1, 4): self.assertTrue( jobs[i - 1]["failedOnCreation"], "The job processing the second file should me marked for failure" ) self.assertEqual( jobs[i - 1]["failedReason"], "File /this/is/file%d has too many events (1000) in 1 lumi(s)" % i, "The reason for the failure is not accurate", ) return
def createTestJobGroup(self): """ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup
def testD_HardLimitSplittingOnly(self): """ _testD_HardLimitSplittingOnly_ Checks that we can split a set of files where every file has a single lumi too big to fit in a runnable job """ splitter = SplitterFactory() # Create 3 single-big-lumi files testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail sing lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup") for i in range(1, 4): self.assertTrue(jobs[i - 1]['failedOnCreation'], "The job processing the second file should me marked for failure") error = 'File /this/is/file%s has a single lumi %d, in run %s' % (i, i - 1, i - 1) error += ' with too many events 1000 and it woud take 12000 sec to run' self.assertEqual(jobs[i - 1]['failedReason'], error) return
def testFilesWithoutOtherSubscriptions(self): """ _testFilesWithoutOtherSubscriptions_ Test the case where files only in the delete subscription can happen if cleanup of the other subscriptions is fast """ testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test") testWorkflowA.create() testFileset = Fileset(name="TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size=1000, events=100, locations=set(["T2_CH_CERN"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription(fileset=testFileset, workflow=testWorkflowA, split_algo="SiblingProcessingBased", type="Processing") testSubscriptionA.create() splitter = SplitterFactory() deleteFactoryA = splitter(package="WMCore.WMBS", subscription=testSubscriptionA) result = deleteFactoryA(files_per_job=50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
def createTestJobGroup(self): """ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for _ in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup
def testC_HardLimitSplitting(self): """ _testC_HardLimitSplitting_ Test that we can specify a event limit, the algorithm shall take single lumi files with more events than the limit and mark them for failure """ splitter = SplitterFactory() # Create 3 files, the one in the middle is a "bad" file testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "somese.cern.ch") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch") testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "somese.cern.ch") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing" ) testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail sing lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory( halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, max_events_per_lumi=800, performance=self.performanceParams, ) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup") self.assertTrue(jobs[3]["failedOnCreation"], "The job processing the second file should me marked for failure") self.assertEqual( jobs[3]["failedReason"], "File /this/is/file2 has too many events (1000) in 1 lumi(s)", "The reason for the failure is not accurate", )
def testUpdateFailedDoc(self): """ _testUpdateFailedDoc_ Verify that the update function will work correctly and not throw a 500 error if the doc didn't make it into the database for some reason. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute("site1", seName = "somese.cern.ch") testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task = self.taskName) testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "FileBased") testSubscription.create() testFileA = File(lfn = "SomeLFNA", events = 1024, size = 2048, locations = set(["somese.cern.ch"])) testFileA.create() testFileset.addFile(testFileA) testFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = testSubscription) jobGroup = jobFactory(files_per_job = 1)[0] testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Merge" testJobA["couch_record"] = str(testJobA["id"]) change.propagate([testJobA], "new", "none") testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) self.assertTrue("states" in testJobADoc) self.assertTrue("1" in testJobADoc["states"]) return
def createJobCollection(self, name, nSubs, nFiles, workflowURL='test'): """ _createJobCollection_ Create a collection of jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") testWorkflow.create() for sub in range(nSubs): nameStr = '%s-%i' % (name, sub) myThread.transaction.begin() testFileset = Fileset(name=nameStr) testFileset.create() for f in range(nFiles): # pick a random site site = random.choice(self.sites) testFile = File(lfn="/lfn/%s/%i" % (nameStr, f), size=1024, events=10) testFile.setLocation(site) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() myThread.transaction.commit() return
def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100, firstLumi=1, lastLumi=10, index=1, existingSub=None): """ _generateFakeMCFile_ Generates a fake MC file for testing production EventBased creation of jobs, it creates a single file subscription if no existing subscription is provided. """ # MC comes with MCFakeFile(s) newFile = File("MCFakeFile-some-hash-%s" % str(index).zfill(5), size=1000, events=numEvents, locations=set(["somese.cern.ch"])) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent newFile.create() if existingSub is None: singleMCFileset = Fileset(name="MCTestFileset-%i" % index) singleMCFileset.create() singleMCFileset.addFile(newFile) singleMCFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() singleMCFileSubscription = Subscription(fileset=singleMCFileset, workflow=testWorkflow, split_algo="EventBased", type="Production") singleMCFileSubscription.create() return singleMCFileSubscription else: existingSub['fileset'].addFile(newFile) existingSub['fileset'].commit() return existingSub
def testHardLimitSplitting(self): """ _testHardLimitSplitting_ Test that we can specify a event limit, the algorithm shall take single lumi files with more events than the limit and mark them for failure """ splitter = SplitterFactory() # Create 3 files, the one in the middle is a "bad" file testFileset = Fileset(name="FilesetA") testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail single lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 5) # One job should be failed, the rest should be fine for jobNum in (0, 1, 3, 4): self.assertFalse(jobs[jobNum].get('failedOnCreation')) self.assertTrue(jobs[2]['failedOnCreation']) self.assertEqual(jobs[2]['failedReason'], 'File /this/is/file2 has a single lumi 1, in run 1 with too many events 1000 and it woud take 12000 sec to run') return
def createTestSubscription(self, nFiles, nSites=1, closeFileset=False): """ _createTestSubscription_ Create a set of test subscriptions for testing purposes. """ if nSites > self.nSites: nSites = self.nSites testFileset = Fileset(name="TestFileset") testFileset.create() # Create a testWorkflow testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() # Create the files for each site for s in range(nSites): for i in range(nFiles): newFile = File(makeUUID(), size=1024, events=100, locations=set(["T2_CH_CERN_%i" % s])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing") testSubscription.create() # Close the fileset if closeFileset: testFileset.markOpen(isOpen=False) return testSubscription
def createJobCollection(self, name, nSubs, nFiles, workflowURL = 'test'): """ _createJobCollection_ Create a collection of jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec = workflowURL, owner = "mnorman", name = name, task="/TestWorkload/ReReco") testWorkflow.create() for sub in range(nSubs): nameStr = '%s-%i' % (name, sub) myThread.transaction.begin() testFileset = Fileset(name = nameStr) testFileset.create() for f in range(nFiles): # pick a random site site = random.choice(self.sites) testFile = File(lfn = "/lfn/%s/%i" % (nameStr, f), size = 1024, events = 10) testFile.setLocation(site) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() myThread.transaction.commit() return
def testLoadOutputID(self): """ _testLoadOutputID_ Test whether we can load an output ID for a job """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id) return
def testLoadOutputID(self): """ _testLoadOutputID_ Test whether we can load an output ID for a job """ testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow) testSubscription.create() testFileA = File(lfn = makeUUID(), locations = "test.site.ch") testFileB = File(lfn = makeUUID(), locations = "test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJob = Job() testJob.create(group = testJobGroup) self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id) return
def testE_DisableHardLimitSplitting(self): """ _testC_DisableHardLimitSplitting_ Test that we can bypass and event limit when allowCreationFailure is set to False. The algorithm shall take single lumi files with more events than the limit but not mark them for failure """ splitter = SplitterFactory() # Create 3 files, the one in the middle is a "bad" file testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail sing lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, allowCreationFailure=False, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup") failedJobs = [job for job in jobs if job.get('failedOnCreation', False)] self.assertEqual(len(failedJobs), 0, "There should be no failed jobs")
def createFileCollection(self, name, nSubs, nFiles, workflowURL = 'test', site = None): """ _createFileCollection_ Create a collection of files for splitting into jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec = workflowURL, owner = "mnorman", name = name, task="/TestWorkload/ReReco") testWorkflow.create() for sub in range(nSubs): nameStr = '%s-%i' % (name, sub) testFileset = Fileset(name = nameStr) testFileset.create() for f in range(nFiles): # pick a random site if not site: tmpSite = 'se.%s' % (random.choice(self.sites)) else: tmpSite = 'se.%s' % (site) testFile = File(lfn = "/lfn/%s/%i" % (nameStr, f), size = 1024, events = 10) testFile.setLocation(tmpSite) testFile.create() testFileset.addFile(testFile) testFileset.commit() testFileset.markOpen(isOpen = 0) testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() return
def testHardLimitSplittingOnly(self): """ _testHardLimitSplittingOnly_ Checks that we can split a set of files where every file has a single lumi too big to fit in a runnable job """ splitter = SplitterFactory() # Create 3 single-big-lumi files testFileset = Fileset(name="FilesetA") testFileA = self.createFile("/this/is/file0", 1000, 0, 1, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file1", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file2", 1000, 2, 1, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Fail single lumis with more than 800 events and put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 3) for i in range(3): num = jobs[i]['mask']['runAndLumis'].keys()[0] self.assertTrue(jobs[i]['failedOnCreation']) error = 'File /this/is/file%s has a single lumi %s, in run %s' % (num, num, num) error += ' with too many events 1000 and it woud take 12000 sec to run' self.assertEqual(jobs[i]['failedReason'], error) return
def generateFakeMCFile(self, numEvents = 100, firstEvent = 1, lastEvent = 100, firstLumi = 1, lastLumi = 10, index = 1, existingSub = None): """ _generateFakeMCFile_ Generates a fake MC file for testing production EventBased creation of jobs, it creates a single file subscription if no existing subscription is provided. """ # MC comes with MCFakeFile(s) newFile = File("MCFakeFile-some-hash-%s" % str(index).zfill(5), size = 1000, events = numEvents, locations = set(["somese.cern.ch"])) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent newFile.create() if existingSub is None: singleMCFileset = Fileset(name = "MCTestFileset-%i" % index) singleMCFileset.create() singleMCFileset.addFile(newFile) singleMCFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task = "Test") testWorkflow.create() singleMCFileSubscription = Subscription(fileset = singleMCFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Production") singleMCFileSubscription.create() return singleMCFileSubscription else: existingSub['fileset'].addFile(newFile) existingSub['fileset'].commit() return existingSub
def testHardLimitSplittingOnly(self): """ _testHardLimitSplittingOnly_ Checks that we can split a set of files where every file has a single lumi too big to fit in a runnable job """ splitter = SplitterFactory() # Create 3 single-big-lumi files testFileset = Fileset(name="FilesetA") testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Fail single lumis with more than 800 events and put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, max_events_per_lumi=800, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 3) for job in jobs: self.assertTrue(job['failedOnCreation']) self.assertIn("Too many (estimated) events (1000.0) in", job['failedReason']) return
class RepackTest(unittest.TestCase): """ _RepackTest_ Test for Repack job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE2') """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'TIME': int(time.time()), 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") for lumi in [1, 2, 3, 4]: insertLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': lumi }, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "A"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name="TestFileset1") self.fileset1.load() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Repack", type="Repack") self.subscription1.create() # keep for later self.insertClosedLumiDAO = daoFactory( classname="RunLumiCloseout.InsertClosedLumi") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['maxSizeSingleLumi'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxSizeMultiLumi'] = 10 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 500000 self.splitArgs['maxInputFiles'] = 1000 return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def getNumActiveSplitLumis(self): """ _getNumActiveSplitLumis_ helper function that counts the number of active split lumis """ myThread = threading.currentThread() results = myThread.dbi.processData("""SELECT COUNT(*) FROM lumi_section_split_active """, transaction=False)[0].fetchall() return results[0][0] def test00(self): """ _test00_ Test that the job name prefix feature works Test multi lumi size threshold Multi lumi input """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi'] jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxSizeMultiLumi'] = 5000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Repack-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Repack-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test01(self): """ _test01_ Test multi lumi event threshold Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test02(self): """ _test02_ Test single lumi size threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxSizeSingleLumi'] = 6500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return def test03(self): """ _test03_ Test single lumi event threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 650 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return def test04(self): """ _test04_ Test streamer count threshold (only multi lumi) Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return def test05(self): """ _test05_ Test repacking of multiple lumis with holes in the lumi sequence Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': 3, 'STREAM': "A", 'FILECOUNT': 0, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") return def test06(self): """ _test06_ Test repacking of 3 lumis 2 small lumis (single job), followed by a big one (multiple jobs) files for lumi 1 and 2 are below multi-lumi thresholds files for lumi 3 are above single-lumi threshold """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3]: filecount = 2 for i in range(filecount): if lumi == 3: nevents = 500 else: nevents = 100 newFile = File(makeUUID(), size=1000, events=nevents) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) mySplitArgs['maxInputEvents'] = 900 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1, "ERROR: second job does not process 1 file") self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1, "ERROR: third job does not process 1 file") return
class SizeBasedTest(unittest.TestCase): """ _SizeBasedTest_ Test size based job splitting. """ def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", seName = "somese.cern.ch") locationAction.execute(siteName = "site2", seName = "otherse.cern.ch") self.multipleFileFileset = Fileset(name = "TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name = "TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name = "TestFileset3") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100) newFile.setLocation("somese.cern.ch") newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100) newFile.setLocation(["somese.cern.ch","otherse.cern.ch"]) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "SizeBased", type = "Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "SizeBased", type = "Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset, workflow = testWorkflow, split_algo = "SizeBased", type = "Processing") self.multipleSiteSubscription.create() return def tearDown(self): """ _tearDown_ Clear out WMBS. """ myThread = threading.currentThread() if myThread.transaction == None: myThread.transaction = Transaction(self.dbi) myThread.transaction.begin() factory = WMFactory("WMBS", "WMCore.WMBS") destroy = factory.loadObject(myThread.dialect + ".Destroy") destroyworked = destroy.execute(conn = myThread.transaction.conn) if not destroyworked: raise Exception("Could not complete WMBS tear down.") myThread.transaction.commit() return def testExactEvents(self): """ _testExactEvents_ Test event based job splitting when the number of events per job is exactly the same as the number of events in the input file. """ splitter = SplitterFactory() jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(size_per_job = 1000) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." return def testMultipleFiles(self): """ _testMultipleFiles_ Tests the mechanism for splitting up multiple files into jobs with a variety of different arguments. """ splitter = SplitterFactory() jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(size_per_job = 1000) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 10) for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles()), 1) return def testMultipleFiles2000(self): """ _testMultipleFiles2000_ Tests the mechanism for splitting up multiple files into jobs with a variety of different arguments. """ splitter = SplitterFactory() jobFactory = splitter(self.multipleFileSubscription) #Test it with two files per job jobGroups = jobFactory(size_per_job = 2000) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 5) for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles()), 2) return def testMultipleFiles2500(self): """ _testMultipleFiles2500_ Tests the mechanism for splitting up multiple files into jobs with a variety of different arguments. """ splitter = SplitterFactory() jobFactory = splitter(self.multipleFileSubscription) #Now test it with a size that can't be broken up evenly jobGroups = jobFactory(size_per_job = 2500) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 5) for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles()), 2) return def testMultipleFiles500(self): """ _testMultipleFiles500_ Tests the mechanism for splitting up multiple files into jobs with a variety of different arguments. """ splitter = SplitterFactory() jobFactory = splitter(self.multipleFileSubscription) #Test it with something too small to handle; should return one job per file, plus one extra #open at the end jobGroups = jobFactory(size_per_job = 500) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 10) return def testMultipleSites(self): """ _testMultipleSites_ Tests how to break up files at different locations """ splitter = SplitterFactory() jobFactory = splitter(self.multipleSiteSubscription) jobGroups = jobFactory(size_per_job = 1000) self.assertEqual(len(jobGroups), 2) self.assertEqual(len(jobGroups[0].jobs), 5) for job in jobGroups[0].jobs: self.assertEqual(len(job.getFiles()), 1)
class SiblingProcessingBasedTest(unittest.TestCase): """ _SiblingProcessingBasedTest_ Test SiblingProcessing job splitting. """ def setUp(self): """ _setUp_ Setup the database connections and schema. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute("T2_CH_CERN", pnn = "T2_CH_CERN") locationAction.execute("T1_US_FNAL", pnn = "T1_US_FNAL_Disk") self.testFilesetA = Fileset(name = "FilesetA") self.testFilesetA.create() self.testFilesetB = Fileset(name = "FilesetB") self.testFilesetB.create() self.testFileA = File("testFileA", size = 1000, events = 100, locations = set(["T2_CH_CERN"])) self.testFileA.create() self.testFileB = File("testFileB", size = 1000, events = 100, locations = set(["T2_CH_CERN"])) self.testFileB.create() self.testFileC = File("testFileC", size = 1000, events = 100, locations = set(["T2_CH_CERN"])) self.testFileC.create() self.testFilesetA.addFile(self.testFileA) self.testFilesetA.addFile(self.testFileB) self.testFilesetA.addFile(self.testFileC) self.testFilesetA.commit() self.testFileD = File("testFileD", size = 1000, events = 100, locations = set(["T2_CH_CERN"])) self.testFileD.create() self.testFileE = File("testFileE", size = 1000, events = 100, locations = set(["T2_CH_CERN"])) self.testFileE.create() self.testFileF = File("testFileF", size = 1000, events = 100, locations = set(["T2_CH_CERN"])) self.testFileF.create() self.testFilesetB.addFile(self.testFileD) self.testFilesetB.addFile(self.testFileE) self.testFilesetB.addFile(self.testFileF) self.testFilesetB.commit() testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve", name = "wfA", task = "Test") testWorkflowA.create() testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve", name = "wfB", task = "Test") testWorkflowB.create() testWorkflowC = Workflow(spec = "specC.xml", owner = "Steve", name = "wfC", task = "Test") testWorkflowC.create() testWorkflowD = Workflow(spec = "specD.xml", owner = "Steve", name = "wfD", task = "Test") testWorkflowD.create() self.testSubscriptionA = Subscription(fileset = self.testFilesetA, workflow = testWorkflowA, split_algo = "FileBased", type = "Processing") self.testSubscriptionA.create() self.testSubscriptionB = Subscription(fileset = self.testFilesetB, workflow = testWorkflowB, split_algo = "FileBased", type = "Processing") self.testSubscriptionB.create() self.testSubscriptionC = Subscription(fileset = self.testFilesetB, workflow = testWorkflowC, split_algo = "FileBased", type = "Processing") self.testSubscriptionC.create() self.testSubscriptionD = Subscription(fileset = self.testFilesetB, workflow = testWorkflowD, split_algo = "FileBased", type = "Processing") self.testSubscriptionD.create() deleteWorkflow = Workflow(spec = "specE.xml", owner = "Steve", name = "wfE", task = "Test") deleteWorkflow.create() self.deleteSubscriptionA = Subscription(fileset = self.testFilesetA, workflow = deleteWorkflow, split_algo = "SiblingProcessingBased", type = "Cleanup") self.deleteSubscriptionA.create() self.deleteSubscriptionB = Subscription(fileset = self.testFilesetB, workflow = deleteWorkflow, split_algo = "SiblingProcessingBased", type = "Cleanup") self.deleteSubscriptionB.create() return def tearDown(self): """ _tearDown_ Clear out WMBS. """ self.testInit.clearDatabase() return def testSiblingProcessing(self): """ _testSiblingProcessing_ Verify that the sibling processing split works correctly dealing with failed files and acquiring files correctly. """ splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = self.deleteSubscriptionA) deleteFactoryB = splitter(package = "WMCore.WMBS", subscription = self.deleteSubscriptionB) result = deleteFactoryA() assert len(result) == 0, \ "Error: No jobs should be returned." result = deleteFactoryB() assert len(result) == 0, \ "Error: No jobs should be returned." self.testSubscriptionA.completeFiles(self.testFileA) result = deleteFactoryA(files_per_job = 1) assert len(result) == 1, \ "Error: Only one jobgroup should be returned." assert len(result[0].jobs) == 1, \ "Error: There should only be one job in the jobgroup." assert result[0].jobs[0]["possiblePSN"] == set(["T2_CH_CERN"]), \ "Error: possiblePSN is wrong." assert len(result[0].jobs[0]["input_files"]) == 1, \ "Error: Job should only have one input file." assert result[0].jobs[0]["input_files"][0]["lfn"] == "testFileA", \ "Error: Input file for job is wrong." result = deleteFactoryB(files_per_job = 1) assert len(result) == 0, \ "Error: Second subscription should have no jobs." result = deleteFactoryA(files_per_job = 1) assert len(result) == 0, \ "Error: No jobs should have been created." self.testSubscriptionB.completeFiles(self.testFileD) self.testSubscriptionC.failFiles(self.testFileD) result = deleteFactoryA(files_per_job = 1) assert len(result) == 0, \ "Error: No jobs should have been created." result = deleteFactoryB(files_per_job = 1) assert len(result) == 0, \ "Error: No jobs should have been created." self.testSubscriptionD.failFiles(self.testFileD) result = deleteFactoryA(files_per_job = 1) assert len(result) == 0, \ "Error: No jobs should have been created." result = deleteFactoryB(files_per_job = 1) assert len(result) == 0, \ "Error: No job groups should have been created." self.testSubscriptionB.completeFiles([self.testFileE, self.testFileF]) self.testSubscriptionC.completeFiles([self.testFileE, self.testFileF]) self.testSubscriptionD.completeFiles([self.testFileE, self.testFileF]) result = deleteFactoryB(files_per_job = 10) assert len(result) == 0, \ "Error: No jobs should have been created." self.testFilesetB.markOpen(False) result = deleteFactoryB(files_per_job = 10) assert len(result) == 1, \ "Error: One jobgroup should have been returned." assert len(result[0].jobs) == 1, \ "Error: There should only be one job in the jobgroup." assert len(result[0].jobs[0]["input_files"]) == 2, \ "Error: Job should only have one input file." lfns = [result[0].jobs[0]["input_files"][0]["lfn"], result[0].jobs[0]["input_files"][1]["lfn"]] assert "testFileE" in lfns, \ "Error: TestFileE missing from job input." assert "testFileF" in lfns, \ "Error: TestFileF missing from job input." self.assertEqual(len(self.deleteSubscriptionB.availableFiles()), 0, "Error: There should be no available files.") completeFiles = self.deleteSubscriptionB.filesOfStatus("Completed") self.assertEqual(len(completeFiles), 1, "Error: There should only be one complete file.") self.assertEqual(list(completeFiles)[0]["lfn"], "testFileD", "Error: Test file D should be complete.") return def testMultipleLocations(self): """ _testMultipleLocations_ Verify that the sibling processing based algorithm doesn't create jobs that run over files at multiple sites. """ testFile1 = File("testFile1", size = 1000, events = 100, locations = set(["T1_US_FNAL_Disk"])) testFile1.create() testFile2 = File("testFile2", size = 1000, events = 100, locations = set(["T1_US_FNAL_Disk"])) testFile2.create() testFile3 = File("testFile3", size = 1000, events = 100, locations = set(["T1_US_FNAL_Disk"])) testFile3.create() self.testFilesetA.addFile(testFile1) self.testFilesetA.addFile(testFile2) self.testFilesetA.addFile(testFile3) self.testFilesetA.commit() self.testFilesetA.markOpen(False) self.testSubscriptionA.completeFiles([testFile1, testFile2, testFile3]) self.testSubscriptionA.completeFiles([self.testFileA, self.testFileB, self.testFileC]) splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = self.deleteSubscriptionA) result = deleteFactoryA(files_per_job = 50) assert len(result) == 2, \ "Error: Wrong number of jobgroups returned." goldenFilesA = ["testFileA", "testFileB", "testFileC"] goldenFilesB = ["testFile1", "testFile2", "testFile3"] for jobGroup in result: assert len(jobGroup.jobs) == 1, \ "Error: Wrong number of jobs in jobgroup." assert len(jobGroup.jobs[0]["input_files"]) == 3, \ "Error: Wrong number of input files in job." jobSite = jobGroup.jobs[0]["possiblePSN"] assert (jobSite == set(["T2_CH_CERN"]) or jobSite == set(["T1_US_FNAL"])), \ "Error: Wrong site for job." if jobSite == set(["T2_CH_CERN"]): goldenFiles = goldenFilesA else: goldenFiles = goldenFilesB for jobFile in jobGroup.jobs[0]["input_files"]: goldenFiles.remove(jobFile["lfn"]) assert len(goldenFiles) == 0, \ "Error: Files are missing." return def testLargeNumberOfFiles(self): """ _testLargeNumberOfFiles_ Setup a subscription with 500 files and verify that the splitting algo works correctly. """ testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve", name = "wfA", task = "Test") testWorkflowA.create() testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve", name = "wfB", task = "Test") testWorkflowB.create() testFileset = Fileset(name = "TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription(fileset = testFileset, workflow = testWorkflowA, split_algo = "FileBased", type = "Processing") testSubscriptionA.create() testSubscriptionB = Subscription(fileset = testFileset, workflow = testWorkflowB, split_algo = "SiblingProcessingBased", type = "Processing") testSubscriptionB.create() testSubscriptionA.completeFiles(allFiles) splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = testSubscriptionB) result = deleteFactoryA(files_per_job = 50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return def testFilesWithoutOtherSubscriptions(self): """ _testFilesWithoutOtherSubscriptions_ Test the case where files only in the delete subscription can happen if cleanup of the other subscriptions is fast """ testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve", name = "wfA", task = "Test") testWorkflowA.create() testFileset = Fileset(name = "TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription(fileset = testFileset, workflow = testWorkflowA, split_algo = "SiblingProcessingBased", type = "Processing") testSubscriptionA.create() splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = testSubscriptionA) result = deleteFactoryA(files_per_job = 50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
class EventBasedTest(unittest.TestCase): """ _EventBasedTest_ Test event based job splitting. """ def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="site1", pnn="T2_CH_CERN") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(i, *[45 + i])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(1, *[45])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleFileRunset = Fileset(name="TestFileset3") self.multipleFileRunset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(i / 3, *[45])) newFile.create() self.multipleFileRunset.addFile(newFile) self.multipleFileRunset.commit() self.singleRunFileset = Fileset(name="TestFileset4") self.singleRunFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(1, *[45])) newFile.create() self.singleRunFileset.addFile(newFile) self.singleRunFileset.commit() self.singleRunMultipleLumi = Fileset(name="TestFileset5") self.singleRunMultipleLumi.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(1, *[45 + i])) newFile.create() self.singleRunMultipleLumi.addFile(newFile) self.singleRunMultipleLumi.commit() testWorkflow = Workflow(spec="spec.xml", owner="mnorman", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.multipleRunSubscription = Subscription( fileset=self.multipleFileRunset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleRunSubscription = Subscription( fileset=self.singleRunFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleRunMultipleLumiSubscription = Subscription( fileset=self.singleRunMultipleLumi, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription.create() self.multipleRunSubscription.create() self.singleRunSubscription.create() self.singleRunMultipleLumiSubscription.create() return def tearDown(self): """ _tearDown_ Tear down WMBS architechture. """ self.testInit.clearDatabase() return def testExactRuns(self): """ _testExactRuns_ Test run based job splitting when the number of events per job is exactly the same as the number of events in the input file. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleFileSubscription) jobGroups = jobFactory(files_per_job=1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." return def testMoreRuns(self): """ _testMoreEvents_ Test run based job splitting when the number of runs per job is greater than the number of runs in the input file. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleFileSubscription) jobGroups = jobFactory(files_per_job=2) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." return def testMultipleRuns(self): """ _testMultipleRuns_ Test run based job splitting when the number of runs is equal to the number in each input file, with multiple files """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=1) assert len(jobGroups) == 10, \ "ERROR: JobFactory didn't return one JobGroup per run." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't put each run in a file." self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 1) return def testMultipleRunsCombine(self): """ _testMultipleRunsCombine_ Test run based job splitting when the number of jobs is less then the number of files, with multiple files """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleRunSubscription) jobGroups = jobFactory(files_per_job=2) assert len(jobGroups) == 4, \ "ERROR: JobFactory didn't return one JobGroup per run." assert len(jobGroups[1].jobs) == 2, \ "ERROR: JobFactory didn't put only one job in the first job" #Last one in the queue should have one job, previous two (three files per run) self.assertEqual(len(jobGroups[1].jobs.pop().getFiles(type="lfn")), 1) self.assertEqual(len(jobGroups[1].jobs.pop().getFiles(type="lfn")), 2) return def testSingleRunsCombineUneven(self): """ _testSingleRunsCombineUneven_ Test run based job splitting when the number of jobs is less then and indivisible by the number of files, with multiple files. """ #This should return two jobs, one with 8 and one with 2 files splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleRunSubscription) jobGroups = jobFactory(files_per_job=8) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 8) return def testPersistSingleRunsCombineUneven(self): """ _testPerisistSingleRunsCombineUneven_ Test run based job splitting when the number of jobs is less then and indivisible by the number of files, with multiple files. """ #This should return two jobs, one with 8 and one with 2 files splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleRunSubscription) jobGroups = jobFactory(files_per_job=8) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 8) return def testSingleRunsMultipleLumiCombineUneven(self): """ _testSingleRunsMultipeLumiCombineUneven_ Test run based job splitting when the number of jobs is less then and indivisible by the number of files, with multiple files. """ #This should return two jobs, one with 8 and one with 2 files splitter = SplitterFactory() jobFactory = splitter( package="WMCore.WMBS", subscription=self.singleRunMultipleLumiSubscription) jobGroups = jobFactory(files_per_job=8) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 8) return
class ExpressMergeTest(unittest.TestCase): """ _ExpressMergeTest_ Test for ExpressMerge job splitter """ def setUp(self): """ _setUp_ """ import WMQuality.TestInit WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious") self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", "T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state, state_time) VALUES (1, 'SomeSite', 1, 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_pnns (id, pnn) VALUES (2, 'SomePNN') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 2) """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") for lumi in range(1, 5): insertLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': lumi }, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") fileset1 = Fileset(name="TestFileset1") self.fileset2 = Fileset(name="TestFileset2") fileset1.load() self.fileset2.create() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow2 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset=fileset1, workflow=workflow1, split_algo="Express", type="Express") self.subscription2 = Subscription(fileset=self.fileset2, workflow=workflow2, split_algo="ExpressMerge", type="ExpressMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction=False) # keep for later self.insertSplitLumisDAO = daoFactory( classname="JobSplitting.InsertSplitLumis") # default split parameters self.splitArgs = {} self.splitArgs['maxInputSize'] = 2 * 1024 * 1024 * 1024 self.splitArgs['maxInputFiles'] = 500, self.splitArgs['maxLatency'] = 15 * 23 return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def deleteSplitLumis(self): """ _deleteSplitLumis_ """ myThread = threading.currentThread() myThread.dbi.processData("""DELETE FROM lumi_section_split_active """, transaction=False) return def test00(self): """ _test00_ Test that the job name prefix feature works Test latency trigger (wait and 0) """ mySplitArgs = self.splitArgs.copy() for lumi in [1]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxLatency'] = 0 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("ExpressMerge-"), "ERROR: Job has wrong name") return def test01(self): """ _test01_ Test size and event triggers for single lumis (they are ignored) Test latency trigger (timed out) """ mySplitArgs = self.splitArgs.copy() for lumi in [1]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) mySplitArgs['maxInputSize'] = 1 mySplitArgs['maxInputFiles'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") return def test02(self): """ _test02_ Test input files threshold on multi lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) mySplitArgs['maxInputFiles'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return def test03(self): """ _test03_ Test input size threshold on multi lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) mySplitArgs['maxInputSize'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return def test04(self): """ _test04_ Test multi lumis express merges """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") return def test05(self): """ _test05_ Test multi lumis express merges with holes """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return def test06(self): """ _test06_ Test active split lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) self.insertSplitLumisDAO.execute(binds={ 'SUB': self.subscription1['id'], 'LUMI': 1, 'NFILES': 5 }) mySplitArgs['maxLatency'] = 0 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.deleteSplitLumis() jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") return
def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return
class FixedDelayTest(unittest.TestCase): def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="site1", pnn="T2_CH_CERN") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.addRun(Run(i, *[45 + i])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleFileLumiset = Fileset(name="TestFileset3") self.multipleFileLumiset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45 + i / 3])) newFile.create() self.multipleFileLumiset.addFile(newFile) self.multipleFileLumiset.commit() self.singleLumiFileset = Fileset(name="TestFileset4") self.singleLumiFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleLumiFileset.addFile(newFile) self.singleLumiFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="mnorman", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.multipleLumiSubscription = Subscription( fileset=self.multipleFileLumiset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.singleLumiSubscription = Subscription( fileset=self.singleLumiFileset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription.create() self.multipleLumiSubscription.create() self.singleLumiSubscription.create() return def tearDown(self): """ _tearDown_ Nothing to do... """ self.testInit.clearDatabase() return def testNone(self): """ _testNone_ Since the subscriptions are open, we shouldn't get any jobs back """ splitter = SplitterFactory() jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time=int(time.time()) * 2) self.assertEquals(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time=int(time.time()) * 2) self.assertEquals(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time=int(time.time()) * 2) self.assertEquals(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time=int(time.time()) * 2) self.assertEquals(jobGroups, [], "Should have returned a null set") return def testClosed(self): """ _testClosed_ Since the subscriptions are closed and none of the files have been acquired, all of the files should show up """ splitter = SplitterFactory() self.singleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time=1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." self.multipleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1) self.assertEquals(len(jobGroups[0].jobs), 1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 10) self.multipleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1) self.assertEquals(len(jobGroups[0].jobs), 1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 10) #self.assertEquals(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time=1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 10) def testAllAcquired(self): """ _testAllAcquired_ should all return no job groups """ splitter = SplitterFactory() self.singleFileSubscription.acquireFiles( self.singleFileSubscription.availableFiles()) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.acquireFiles( self.multipleFileSubscription.availableFiles()) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") self.multipleLumiSubscription.acquireFiles( self.multipleLumiSubscription.availableFiles()) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.acquireFiles( self.singleLumiSubscription.availableFiles()) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") def testClosedSomeAcquired(self): """ _testClosedSomeAcquired_ since the subscriptions are closed and none of the files ahve been acquired, all of the files should show up """ splitter = SplitterFactory() self.multipleFileSubscription.getFileset().markOpen(False) self.singleFileSubscription.acquireFiles( [self.singleFileSubscription.availableFiles().pop()]) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.getFileset().markOpen(False) self.multipleFileSubscription.acquireFiles( [self.multipleFileSubscription.availableFiles().pop()]) jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup") self.assertEquals(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.multipleLumiSubscription.getFileset().markOpen(False) self.multipleLumiSubscription.acquireFiles( [self.multipleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup") self.assertEquals(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.singleLumiSubscription.getFileset().markOpen(False) self.singleLumiSubscription.acquireFiles( [self.singleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup") self.assertEquals(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.assertEquals(len(myfiles), 9)
workload.save(workloadPath) myThread = threading.currentThread() myThread.transaction.begin() for workloadTask in workload.taskIterator(): inputFileset = Fileset(name=workloadTask.getPathName()) inputFileset.create() virtualFile = File(lfn="%s-virtual-input" % workloadTask.getPathName(), size=0, events=numEvents, locations=set([ "cmssrm.fnal.gov", "storm-fe-cms.cr.cnaf.infn.it", "cmssrm-fzk.gridka.de", "srm2.grid.sinica.edu.tw", "srm-cms.gridpp.rl.ac.uk", "ccsrm.in2p3.fr", "srmcms.pic.es" ]), merged=False) myRun = Run(runNumber=1) myRun.lumis.append(1) virtualFile.addRun(myRun) virtualFile.create() inputFileset.addFile(virtualFile) inputFileset.commit() myWMBSHelper = WMBSHelper(workload) myWMBSHelper.createSubscription(workloadTask.getPathName()) myThread.transaction.commit()
class ConditionTest(unittest.TestCase): """ _ExpressTest_ Test for Express job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE') """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'TIME' : int(time.time()), 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1 }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "Express" }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer") insertStreamerDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1, 'STREAM' : "Express", 'TIME' : int(time.time()), 'LFN' : "/streamer", 'FILESIZE' : 0, 'EVENTS' : 0 }, transaction = False) insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration") insertPromptCalibrationDAO.execute( { 'RUN' : 1, 'STREAM' : "Express" }, transaction = False) self.fileset1 = Fileset(name = "TestFileset1") self.fileset1.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Condition", type = "Condition") self.subscription1.create() # set parentage chain and sqlite fileset alcaRecoFile = File("/alcareco", size = 0, events = 0) alcaRecoFile.addRun(Run(1, *[1])) alcaRecoFile.setLocation("SomeSE", immediateSave = False) alcaRecoFile.create() alcaPromptFile = File("/alcaprompt", size = 0, events = 0) alcaPromptFile.addRun(Run(1, *[1])) alcaPromptFile.setLocation("SomeSE", immediateSave = False) alcaPromptFile.create() sqliteFile = File("/sqlite", size = 0, events = 0) sqliteFile.create() self.fileset1.addFile(sqliteFile) self.fileset1.commit() results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details """, transaction = False)[0].fetchall() setParentageDAO = wmbsDaoFactory(classname = "Files.SetParentage") setParentageDAO.execute(binds = [ { 'parent' : "/streamer", 'child' : "/alcareco" }, { 'parent' : "/alcareco", 'child' : "/alcaprompt" }, { 'parent' : "/alcaprompt", 'child' : "/sqlite" } ], transaction = False) # default split parameters self.splitArgs = {} self.splitArgs['runNumber'] = 1 self.splitArgs['streamName'] = "Express" return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def isPromptCalibFinished(self): """ _isPromptCalibFinished_ """ myThread = threading.currentThread() result = myThread.dbi.processData("""SELECT finished FROM prompt_calib """, transaction = False)[0].fetchall()[0][0] return result def countPromptCalibFiles(self): """ _deleteSplitLumis_ """ myThread = threading.currentThread() result = myThread.dbi.processData("""SELECT COUNT(*) FROM prompt_calib_file """, transaction = False)[0].fetchall()[0][0] return result def test00(self): """ _test00_ Make sure the job splitter behaves correctly. Just make sure the job splitter does nothing when the fileset is open and populates t0ast data structures when it's closed. In the later case all input files should be marked as acquired without creating a job as well. """ mySplitArgs = self.splitArgs.copy() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.assertEqual(self.isPromptCalibFinished(), 0, "ERROR: prompt_calib should not be finished") self.assertEqual(self.countPromptCalibFiles(), 0, "ERROR: there should be no prompt_calib_file") jobGroups = jobFactory(**mySplitArgs) self.assertEqual(self.isPromptCalibFinished(), 0, "ERROR: prompt_calib should not be finished") self.assertEqual(self.countPromptCalibFiles(), 1, "ERROR: there should be one prompt_calib_file") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.assertEqual(self.isPromptCalibFinished(), 1, "ERROR: prompt_calib should be finished") self.assertEqual(self.countPromptCalibFiles(), 1, "ERROR: there should be one prompt_calib_file") return