def createSubscription(self, nFiles, lumisPerFile, twoSites=False, rand=False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() parentFile = File('%s_parent' % baseName, size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) parentFile.create() for i in range(nFiles): newFile = File(lfn='%s_%i' % (baseName, i), size=1000, events=100, locations="T1_US_FNAL_Disk") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn='%s_%i_2' % (baseName, i), size=1000, events=100, locations="T2_CH_CERN") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="LumiBased", type="Processing") testSubscription.create() return testSubscription
def testLotsOfAncestors(self): """ _testLotsOfAncestors_ Create a file with 15 parents with each parent having 100 parents to verify that the query to return grandparents works correctly. """ raise nose.SkipTest testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, checksums = {"cksum": "1"}, locations = "se1.fnal.gov") testFileA.create() for i in xrange(15): testParent = File(lfn = makeUUID(), size = 1024, events = 10, checksums = {"cksum": "1"}, locations = "se1.fnal.gov") testParent.create() testFileA.addParent(testParent["lfn"]) for i in xrange(100): testGParent = File(lfn = makeUUID(), size = 1024, events = 10, checksums = {"cksum": "1"}, locations = "se1.fnal.gov") testGParent.create() testParent.addParent(testGParent["lfn"]) assert len(testFileA.getAncestors(level = 2, type = "lfn")) == 1500, \ "ERROR: Incorrect grand parents returned" return
def testGetInfo(self): """ _testGetInfo_ Test the getInfo() method of the File class to make sure that it returns the correct information. """ testFileParent = File(lfn = "/this/is/a/parent/lfn", size = 1024, events = 20, checksums={'cksum': 1111}) testFileParent.addRun(Run(1, *[45])) testFileParent.create() testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums={'cksum': 222}) testFile.addRun(Run(1, *[45])) testFile.addRun(Run(2, *[46, 47])) testFile.addRun(Run(2, *[47, 48])) testFile.create() testFile.setLocation(se = "se1.fnal.gov", immediateSave = False) testFile.setLocation(se = "se1.cern.ch", immediateSave = False) testFile.addParent("/this/is/a/parent/lfn") info = testFile.getInfo() assert info[0] == testFile["lfn"], \ "ERROR: File returned wrong LFN" assert info[1] == testFile["id"], \ "ERROR: File returned wrong ID" assert info[2] == testFile["size"], \ "ERROR: File returned wrong size" assert info[3] == testFile["events"], \ "ERROR: File returned wrong events" assert info[4] == testFile["checksums"], \ "ERROR: File returned wrong cksum" assert len(info[5]) == 2, \ "ERROR: File returned wrong runs" assert info[5] == [Run(1, *[45]), Run(2, *[46, 47, 48])], \ "Error: Run hasn't been combined correctly" assert len(info[6]) == 2, \ "ERROR: File returned wrong locations" for testLocation in info[6]: assert testLocation in ["se1.fnal.gov", "se1.cern.ch"], \ "ERROR: File returned wrong locations" assert len(info[7]) == 1, \ "ERROR: File returned wrong parents" assert info[7][0] == testFileParent, \ "ERROR: File returned wrong parents" testFile.delete() testFileParent.delete() return
def testGetParentLFNs(self): """ _testGetParentLFNs_ Create three files and set them to be parents of a fourth file. Check to make sure that getParentLFNs() on the child file returns the correct LFNs. """ testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={'cksum': 2}) testFileParentB.addRun(Run(1, *[45])) testFileParentC = File(lfn="/this/is/a/parent/lfnC", size=1024, events=20, checksums={'cksum': 3}) testFileParentC.addRun(Run(1, *[45])) testFileParentA.create() testFileParentB.create() testFileParentC.create() testFile = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFile.addRun(Run(1, *[45])) testFile.create() testFile.addParent(testFileParentA["lfn"]) testFile.addParent(testFileParentB["lfn"]) testFile.addParent(testFileParentC["lfn"]) parentLFNs = testFile.getParentLFNs() assert len(parentLFNs) == 3, \ "ERROR: Child does not have the right amount of parents" goldenLFNs = [ "/this/is/a/parent/lfnA", "/this/is/a/parent/lfnB", "/this/is/a/parent/lfnC" ] for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown parent lfn" goldenLFNs.remove(parentLFN) testFile.delete() testFileParentA.delete() testFileParentB.delete() testFileParentC.delete() return
def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name = baseName) testFileset.create() parentFile = File('%s_parent' % (baseName), size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000, events = 100, locations = "otherse.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() return testSubscription
def testLocationMerging(self): """ _testLocationMerging_ Verify that files residing on different SEs are not merged together in the same job. """ self.stuffWMBS() locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="s2", seName="somese3.cern.ch") fileSite2 = File(lfn="fileSite2", size=4098, events=1024, first_event=0, locations=set(["somese3.cern.ch"])) fileSite2.addRun(Run(1, *[46])) fileSite2.create() fileSite2.addParent(self.parentFileSite2["lfn"]) self.mergeFileset.addFile(fileSite2) self.mergeFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=99999999, max_merge_events=999999999) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 2, \ "ERROR: Two jobs should have been returned." for job in result[0].jobs: firstInputFile = job.getFiles()[0] baseLocation = list(firstInputFile["locations"])[0] for inputFile in job.getFiles(): assert inputFile["locations"] == set(["somese.cern.ch", "somese2.cern.ch"]) or \ inputFile["locations"] == set(["somese3.cern.ch"]), \ "Error: Wrong number of locations" assert list(inputFile["locations"])[0] == baseLocation, \ "Error: Wrong location." return
def testLocationMerging(self): """ _testLocationMerging_ Verify that files residing on different SEs are not merged together in the same job. """ self.stuffWMBS() locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk") fileSite2 = File(lfn="fileSite2", size=4098, events=1024, first_event=0, locations=set(["T1_UK_RAL_Disk"])) fileSite2.addRun(Run(1, *[46])) fileSite2.create() fileSite2.addParent(self.parentFileSite2["lfn"]) self.mergeFileset.addFile(fileSite2) self.mergeFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=99999999, max_merge_events=999999999) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 2, \ "ERROR: Two jobs should have been returned." ralJobs = 0 fnalcernJobs = 0 for job in result[0].jobs: if job["possiblePSN"] == set(["T1_UK_RAL"]): ralJobs += 1 elif job["possiblePSN"] == set(["T1_US_FNAL", "T2_CH_CERN"]): fnalcernJobs += 1 self.assertEqual(ralJobs, 1) self.assertEqual(fnalcernJobs, 1) return
def testGetParentLFNs(self): """ _testGetParentLFNs_ Create three files and set them to be parents of a fourth file. Check to make sure that getParentLFNs() on the child file returns the correct LFNs. """ testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 2}) testFileParentB.addRun(Run(1, *[45])) testFileParentC = File(lfn = "/this/is/a/parent/lfnC", size = 1024, events = 20, checksums = {'cksum': 3}) testFileParentC.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileParentC.create() testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum': 1}) testFile.addRun(Run( 1, *[45])) testFile.create() testFile.addParent(testFileParentA["lfn"]) testFile.addParent(testFileParentB["lfn"]) testFile.addParent(testFileParentC["lfn"]) parentLFNs = testFile.getParentLFNs() assert len(parentLFNs) == 3, \ "ERROR: Child does not have the right amount of parents" goldenLFNs = ["/this/is/a/parent/lfnA", "/this/is/a/parent/lfnB", "/this/is/a/parent/lfnC"] for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown parent lfn" goldenLFNs.remove(parentLFN) testFile.delete() testFileParentA.delete() testFileParentB.delete() testFileParentC.delete() return
def testLocationMerging(self): """ _testLocationMerging_ Verify that files residing on different SEs are not merged together in the same job. """ self.stuffWMBS() locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute(siteName = "s2", seName = "somese3.cern.ch") fileSite2 = File(lfn = "fileSite2", size = 4098, events = 1024, first_event = 0, locations = set(["somese3.cern.ch"])) fileSite2.addRun(Run(1, *[46])) fileSite2.create() fileSite2.addParent(self.parentFileSite2["lfn"]) self.mergeFileset.addFile(fileSite2) self.mergeFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = self.mergeSubscription) result = jobFactory(min_merge_size = 4097, max_merge_size = 99999999, max_merge_events = 999999999) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 2, \ "ERROR: Two jobs should have been returned." for job in result[0].jobs: firstInputFile = job.getFiles()[0] baseLocation = list(firstInputFile["locations"])[0] for inputFile in job.getFiles(): assert inputFile["locations"] == set(["somese.cern.ch", "somese2.cern.ch"]) or \ inputFile["locations"] == set(["somese3.cern.ch"]), \ "Error: Wrong number of locations" assert list(inputFile["locations"])[0] == baseLocation, \ "Error: Wrong location." return
def testLoadData(self): """ _testLoadData_ Test the loading of all data from a file, including run/lumi associations, location information and parentage information. """ testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={'cksum': 1}) testFileParentB.addRun(Run(1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileA.setLocation(se="se1.fnal.gov", immediateSave=False) testFileA.setLocation(se="se1.cern.ch", immediateSave=False) testFileA.addParent("/this/is/a/parent/lfnA") testFileA.addParent("/this/is/a/parent/lfnB") testFileA.updateLocations() testFileB = File(lfn=testFileA["lfn"]) testFileB.loadData(parentage=1) testFileC = File(id=testFileA["id"]) testFileC.loadData(parentage=1) assert testFileA == testFileB, \ "ERROR: File load by LFN didn't work" assert testFileA == testFileC, \ "ERROR: File load by ID didn't work" testFileA.delete() testFileParentA.delete() testFileParentB.delete() return
def testLocationMerging(self): """ _testLocationMerging_ Verify that files residing on different SEs are not merged together in the same job. """ self.stuffWMBS() locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk") fileSite2 = File(lfn="fileSite2", size=4098, events=1024, first_event=0, locations={"T1_UK_RAL_Disk"}) fileSite2.addRun(Run(1, *[46])) fileSite2.create() fileSite2.addParent(self.parentFileSite2["lfn"]) self.mergeFileset.addFile(fileSite2) self.mergeFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=99999999, max_merge_events=999999999) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 2, \ "ERROR: Two jobs should have been returned." ralJobs = 0 fnalcernJobs = 0 for job in result[0].jobs: if job["possiblePSN"] == {"T1_UK_RAL"}: ralJobs += 1 elif job["possiblePSN"] == {"T1_US_FNAL", "T2_CH_CERN"}: fnalcernJobs += 1 self.assertEqual(ralJobs, 1) self.assertEqual(fnalcernJobs, 1) return
def testCursor(self): """ _testCursor_ test the cursor closing is really affected create 100 files with 5 parents and loop 100 times. If the cursors are exhausted will crash.? TODO: improve for more effective testing. """ raise nose.SkipTest fileList = [] parentFile = None for i in range(100): testFile = File(lfn="/this/is/a/lfn%s" % i, size=1024, events=10, checksums={"cksum": "1"}) testFile.addRun(Run(1, *[i])) testFile.create() for j in range(5): parentFile = File(lfn="/this/is/a/lfnP%s" % j, size=1024, events=10, checksums={"cksum": "1"}) parentFile.addRun(Run(1, *[j])) parentFile.create() testFile.addParent(parentFile['lfn']) fileList.append(testFile) for i in range(100): for file in fileList: file.loadData() file.getAncestors(level=2) file.getAncestors(level=2, type="lfn") return
def testGetAncestorLFNs(self): """ _testGenAncestorLFNs_ Create a series of files that have several generations of parentage information. Verify that the parentage information is reported correctly. """ testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov") testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov") testFileB.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov") testFileC.create() testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov") testFileD.create() testFileE = File(lfn="/this/is/a/lfnE", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov") testFileE.create() testFileE = File(lfn="/this/is/a/lfnF", size=1024, events=10, checksums={"cksum": 1}, locations="se1.fnal.gov") testFileE.create() testFileA.addParent(lfn="/this/is/a/lfnB") testFileA.addParent(lfn="/this/is/a/lfnC") testFileB.addParent(lfn="/this/is/a/lfnD") testFileC.addParent(lfn="/this/is/a/lfnD") testFileD.addParent(lfn="/this/is/a/lfnE") testFileD.addParent(lfn="/this/is/a/lfnF") level1 = ["/this/is/a/lfnB", "/this/is/a/lfnC"] level2 = ["/this/is/a/lfnD"] level3 = ["/this/is/a/lfnE", "/this/is/a/lfnF"] level4 = level5 = [] decs2 = ["/this/is/a/lfnA"] assert testFileA.getAncestors(level=1, type="lfn") == level1, "ERROR: level 1 test failed" assert testFileA.getAncestors(level=2, type="lfn") == level2, "ERROR: level 2 test failed" assert testFileA.getAncestors(level=3, type="lfn") == level3, "ERROR: level 3 test failed" assert testFileA.getAncestors(level=4, type="lfn") == level4, "ERROR: level 4 test failed" assert testFileA.getAncestors(level=5, type="lfn") == level5, "ERROR: level 5 test failed" assert testFileD.getDescendants(level=1, type="lfn") == level1, "ERROR: level 1 desc test failed" assert testFileD.getDescendants(level=2, type="lfn") == decs2, "ERROR: level 2 desc test failed" assert testFileD.getDescendants(level=3, type="lfn") == level4, "ERROR: level 3 desc test failed" return
def testLoadData(self): """ _testLoadData_ Test the loading of all data from a file, including run/lumi associations, location information and parentage information. """ testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileA.setLocation(se = "se1.fnal.gov", immediateSave = False) testFileA.setLocation(se = "se1.cern.ch", immediateSave = False) testFileA.addParent("/this/is/a/parent/lfnA") testFileA.addParent("/this/is/a/parent/lfnB") testFileA.updateLocations() testFileB = File(lfn = testFileA["lfn"]) testFileB.loadData(parentage = 1) testFileC = File(id = testFileA["id"]) testFileC.loadData(parentage = 1) assert testFileA == testFileB, \ "ERROR: File load by LFN didn't work" assert testFileA == testFileC, \ "ERROR: File load by ID didn't work" testFileA.delete() testFileParentA.delete() testFileParentB.delete() return
def testCursor(self): """ _testCursor_ test the cursor closing is really affected create 100 files with 5 parents and loop 100 times. If the cursors are exhausted will crash.? TODO: improve for more effective testing. """ raise nose.SkipTest fileList = [] parentFile = None for i in range(100): testFile = File(lfn = "/this/is/a/lfn%s" % i, size = 1024, events = 10, checksums = {"cksum": "1"}) testFile.addRun(Run(1, *[i])) testFile.create() for j in range(5): parentFile = File(lfn = "/this/is/a/lfnP%s" % j, size = 1024, events = 10, checksums = {"cksum": "1"}) parentFile.addRun(Run(1, *[j])) parentFile.create() testFile.addParent(parentFile['lfn']) fileList.append(testFile) for i in range(100): for file in fileList: file.loadData() file.getAncestors(level = 2) file.getAncestors(level = 2, type = "lfn") return
def testLotsOfAncestors(self): """ _testLotsOfAncestors_ Create a file with 15 parents with each parent having 100 parents to verify that the query to return grandparents works correctly. """ raise nose.SkipTest testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, checksums={"cksum": "1"}, locations="se1.fnal.gov") testFileA.create() for i in xrange(15): testParent = File(lfn=makeUUID(), size=1024, events=10, checksums={"cksum": "1"}, locations="se1.fnal.gov") testParent.create() testFileA.addParent(testParent["lfn"]) for i in xrange(100): testGParent = File(lfn=makeUUID(), size=1024, events=10, checksums={"cksum": "1"}, locations="se1.fnal.gov") testGParent.create() testParent.addParent(testGParent["lfn"]) assert len(testFileA.getAncestors(level=2, type="lfn")) == 1500, \ "ERROR: Incorrect grand parents returned" return
def stuffWMBS(self, injected=True): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN") locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN") changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") self.mergeFileset = Fileset(name="mergeFileset") self.mergeFileset.create() self.bogusFileset = Fileset(name="bogusFileset") self.bogusFileset.create() self.mergeMergedFileset = Fileset(name="mergeMergedFileset") self.mergeMergedFileset.create() self.bogusMergedFileset = Fileset(name="bogusMergedFileset") self.bogusMergedFileset.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2", owner="Steve", task="Test") mergeWorkflow.create() markWorkflow = self.daoFactory(classname="Workflow.MarkInjectedWorkflows") markWorkflow.execute(names=[mergeWorkflow.name], injected=injected) self.mergeSubscription = Subscription(fileset=self.mergeFileset, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") self.mergeSubscription.create() self.bogusSubscription = Subscription(fileset=self.bogusFileset, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") inputFileset = Fileset(name="inputFileset") inputFileset.create() inputWorkflow = Workflow(name="inputWorkflow", spec="input", owner="Steve", task="Test") inputWorkflow.create() inputWorkflow.addOutput("output", self.mergeFileset, self.mergeMergedFileset) inputWorkflow.addOutput("output2", self.bogusFileset, self.bogusMergedFileset) bogusInputWorkflow = Workflow(name="bogusInputWorkflow", spec="input", owner="Steve", task="Test") bogusInputWorkflow.create() inputSubscription = Subscription(fileset=inputFileset, workflow=inputWorkflow) inputSubscription.create() bogusInputSubscription = Subscription(fileset=inputFileset, workflow=bogusInputWorkflow) bogusInputSubscription.create() parentFile1 = File(lfn="parentFile1") parentFile1.create() parentFile2 = File(lfn="parentFile2") parentFile2.create() parentFile3 = File(lfn="parentFile3") parentFile3.create() parentFile4 = File(lfn="parentFile4") parentFile4.create() self.parentFileSite2 = File(lfn="parentFileSite2") self.parentFileSite2.create() jobGroup1 = JobGroup(subscription=inputSubscription) jobGroup1.create() jobGroup2 = JobGroup(subscription=inputSubscription) jobGroup2.create() jobGroup3 = JobGroup(subscription=bogusInputSubscription) jobGroup3.create() testJob1 = Job() testJob1.addFile(parentFile1) testJob1.create(jobGroup1) testJob1["state"] = "cleanout" testJob1["oldstate"] = "new" testJob1["couch_record"] = "somejive" testJob1["retry_count"] = 0 testJob1["outcome"] = "success" testJob1.save() changeStateDAO.execute([testJob1]) testJob1A = Job() testJob1A.addFile(parentFile1) testJob1A.create(jobGroup3) testJob1A["state"] = "cleanout" testJob1A["oldstate"] = "new" testJob1A["couch_record"] = "somejive" testJob1A["retry_count"] = 0 testJob1A["outcome"] = "failure" testJob1A.save() changeStateDAO.execute([testJob1A]) testJob2 = Job() testJob2.addFile(parentFile2) testJob2.create(jobGroup1) testJob2["state"] = "cleanout" testJob2["oldstate"] = "new" testJob2["couch_record"] = "somejive" testJob2["retry_count"] = 0 testJob2["outcome"] = "success" testJob2.save() changeStateDAO.execute([testJob2]) testJob3 = Job() testJob3.addFile(parentFile3) testJob3.create(jobGroup2) testJob3["state"] = "cleanout" testJob3["oldstate"] = "new" testJob3["couch_record"] = "somejive" testJob3["retry_count"] = 0 testJob3["outcome"] = "success" testJob3.save() changeStateDAO.execute([testJob3]) testJob4 = Job() testJob4.addFile(parentFile4) testJob4.create(jobGroup2) testJob4["state"] = "cleanout" testJob4["oldstate"] = "new" testJob4["couch_record"] = "somejive" testJob4["retry_count"] = 0 testJob4["outcome"] = "failure" testJob4.save() changeStateDAO.execute([testJob4]) # We'll simulate a failed split by event job that the merger should # ignore. parentFile5 = File(lfn="parentFile5") parentFile5.create() testJob5 = Job() testJob5.addFile(parentFile5) testJob5.create(jobGroup2) testJob5["state"] = "cleanout" testJob5["oldstate"] = "new" testJob5["couch_record"] = "somejive" testJob5["retry_count"] = 0 testJob5["outcome"] = "success" testJob5.save() changeStateDAO.execute([testJob5]) testJob6 = Job() testJob6.addFile(parentFile5) testJob6.create(jobGroup2) testJob6["state"] = "cleanout" testJob6["oldstate"] = "new" testJob6["couch_record"] = "somejive" testJob6["retry_count"] = 0 testJob6["outcome"] = "failure" testJob6.save() changeStateDAO.execute([testJob6]) testJob7 = Job() testJob7.addFile(self.parentFileSite2) testJob7.create(jobGroup2) testJob7["state"] = "cleanout" testJob7["oldstate"] = "new" testJob7["couch_record"] = "somejive" testJob7["retry_count"] = 0 testJob7["outcome"] = "success" testJob7.save() changeStateDAO.execute([testJob7]) badFile1 = File(lfn="badFile1", size=10241024, events=10241024, first_event=0, locations={"T2_CH_CERN"}) badFile1.addRun(Run(1, *[45])) badFile1.create() badFile1.addParent(parentFile5["lfn"]) file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) file1.addRun(Run(1, *[45])) file1.create() file1.addParent(parentFile1["lfn"]) file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) file2.addRun(Run(1, *[45])) file2.create() file2.addParent(parentFile1["lfn"]) file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) file3.addRun(Run(1, *[45])) file3.create() file3.addParent(parentFile1["lfn"]) file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations={"T2_CH_CERN"}) file4.addRun(Run(1, *[45])) file4.create() file4.addParent(parentFile1["lfn"]) fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileA.addRun(Run(1, *[46])) fileA.create() fileA.addParent(parentFile2["lfn"]) fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileB.addRun(Run(1, *[46])) fileB.create() fileB.addParent(parentFile2["lfn"]) fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileC.addRun(Run(1, *[46])) fileC.create() fileC.addParent(parentFile2["lfn"]) fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileI.addRun(Run(2, *[46])) fileI.create() fileI.addParent(parentFile3["lfn"]) fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileII.addRun(Run(2, *[46])) fileII.create() fileII.addParent(parentFile3["lfn"]) fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIII.addParent(parentFile3["lfn"]) fileIV = File(lfn="fileIV", size=1024, events=1024, first_event=3072, locations={"T2_CH_CERN"}) fileIV.addRun(Run(2, *[46])) fileIV.create() fileIV.addParent(parentFile3["lfn"]) fileX = File(lfn="badFileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileX.addRun(Run(1, *[47])) fileX.create() fileX.addParent(parentFile4["lfn"]) fileY = File(lfn="badFileB", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileY.addRun(Run(1, *[47])) fileY.create() fileY.addParent(parentFile4["lfn"]) fileZ = File(lfn="badFileC", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileZ.addRun(Run(1, *[47])) fileZ.create() fileZ.addParent(parentFile4["lfn"]) jobGroup1.output.addFile(file1) jobGroup1.output.addFile(file2) jobGroup1.output.addFile(file3) jobGroup1.output.addFile(file4) jobGroup1.output.addFile(fileA) jobGroup1.output.addFile(fileB) jobGroup1.output.addFile(fileC) jobGroup1.output.commit() jobGroup2.output.addFile(fileI) jobGroup2.output.addFile(fileII) jobGroup2.output.addFile(fileIII) jobGroup2.output.addFile(fileIV) jobGroup2.output.addFile(fileX) jobGroup2.output.addFile(fileY) jobGroup2.output.addFile(fileZ) jobGroup2.output.addFile(badFile1) jobGroup2.output.commit() for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]: self.mergeFileset.addFile(fileObj) self.bogusFileset.addFile(fileObj) self.mergeFileset.commit() self.bogusFileset.commit() return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", seName = "somese.cern.ch") locationAction.execute(siteName = "site2", seName = "otherse.cern.ch") self.multipleFileFileset = Fileset(name = "TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.addRun(Run(i, *[45])) newFile.create() newFile.addParent(lfn = parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name = "TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name = "TestFileset3") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["otherse.cern.ch", "somese.cern.ch"])) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test" ) testWorkflow.create() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") self.multipleSiteSubscription.create() return
def testGetAncestorLFNs(self): """ _testGenAncestorLFNs_ Create a series of files that have several generations of parentage information. Verify that the parentage information is reported correctly. """ testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, checksums={'cksum': 1}, locations="se1.fnal.gov") testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10, checksums={'cksum': 1}, locations="se1.fnal.gov") testFileB.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10, checksums={'cksum': 1}, locations="se1.fnal.gov") testFileC.create() testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10, checksums={'cksum': 1}, locations="se1.fnal.gov") testFileD.create() testFileE = File(lfn="/this/is/a/lfnE", size=1024, events=10, checksums={'cksum': 1}, locations="se1.fnal.gov") testFileE.create() testFileE = File(lfn="/this/is/a/lfnF", size=1024, events=10, checksums={'cksum': 1}, locations="se1.fnal.gov") testFileE.create() testFileA.addParent(lfn="/this/is/a/lfnB") testFileA.addParent(lfn="/this/is/a/lfnC") testFileB.addParent(lfn="/this/is/a/lfnD") testFileC.addParent(lfn="/this/is/a/lfnD") testFileD.addParent(lfn="/this/is/a/lfnE") testFileD.addParent(lfn="/this/is/a/lfnF") level1 = ["/this/is/a/lfnB", "/this/is/a/lfnC"] level2 = ["/this/is/a/lfnD"] level3 = ["/this/is/a/lfnE", "/this/is/a/lfnF"] level4 = level5 = [] decs2 = ["/this/is/a/lfnA"] assert testFileA.getAncestors(level=1, type='lfn') == level1, \ "ERROR: level 1 test failed" assert testFileA.getAncestors(level=2, type='lfn') == level2, \ "ERROR: level 2 test failed" assert testFileA.getAncestors(level=3, type='lfn') == level3, \ "ERROR: level 3 test failed" assert testFileA.getAncestors(level=4, type='lfn') == level4, \ "ERROR: level 4 test failed" assert testFileA.getAncestors(level=5, type='lfn') == level5, \ "ERROR: level 5 test failed" assert testFileD.getDescendants(level=1, type='lfn') == level1, \ "ERROR: level 1 desc test failed" assert testFileD.getDescendants(level=2, type='lfn') == decs2, \ "ERROR: level 2 desc test failed" assert testFileD.getDescendants(level=3, type='lfn') == level4, \ "ERROR: level 3 desc test failed" return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="T1_US_FNAL", pnn="T1_US_FNAL_Disk") locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) parentFile.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) newFile.addRun(Run(i, *[45])) newFile.create() newFile.addParent(lfn=parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name="TestFileset3") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T2_CH_CERN", "T1_US_FNAL_Disk"])) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription( fileset=self.multipleSiteFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") self.multipleSiteSubscription.create() self.performanceParams = { 'timePerEvent': 12, 'memoryRequirement': 2300, 'sizePerEvent': 400 } return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute(siteName = 's1', seName = "somese.cern.ch") locationAction.execute(siteName = 's2', seName = "otherse.cern.ch") self.multipleFileFileset = Fileset(name = "TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() newFile.addParent(lfn = parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name = "TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name = "TestFileset3") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100) newFile.setLocation("somese.cern.ch") newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100) newFile.setLocation(["somese.cern.ch","otherse.cern.ch"]) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Processing") self.multipleSiteSubscription.create() return
def populateWMBS(self): """ _populateWMBS_ Create files and subscriptions in WMBS """ myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute(siteName = 's1', seName = "somese.cern.ch") locationAction.execute(siteName = 's2', seName = "otherse.cern.ch") self.validLocations = ["somese.cern.ch", "otherse.cern.ch"] self.multipleFileFileset = Fileset(name = "TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for _ in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() newFile.addParent(lfn = parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name = "TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name = "TestFileset3") self.multipleSiteFileset.create() for _ in range(5): newFile = File(makeUUID(), size = 1000, events = 100) newFile.setLocation("somese.cern.ch") newFile.create() self.multipleSiteFileset.addFile(newFile) for _ in range(5): newFile = File(makeUUID(), size = 1000, events = 100) newFile.setLocation(["somese.cern.ch", "otherse.cern.ch"]) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task = "Test") testWorkflow.create() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Processing") self.multipleSiteSubscription.create() return
def __call__(self, filesets): """ The algorithm itself """ # Update run list self.getNewRuns() # Do per fileset work, abandon fileset processing on exception for fileset in filesets: ds = fileset.name try: # Do per run work watchCompleteFiles = [] for watch in self.watchedRuns: # Ensure watcher has dataset listed watch.addDatasetOfInterest(ds) # Query DBS to find all blocks for this run / dataset (files, blocks, fileInfoMap) = \ self.dbsHelper.getFileInfo(watch.run, ds) # Now determine all required parent blocks parentBlocks = set() if fileset.requireParents: parentDs = self.dbsHelper.getParentDataset(ds) parentBlocks = self.dbsHelper.getBlockInfo(watch.run, parentDs) # Final list of all required blocks allBlocks = blocks[:] allBlocks.update(parentBlocks) # Find all sites where all blocks are complete sites = self.phedexHelper.getCompleteSites(blocks) # Get sites with newly completed transfers newSites = watch.getNewSites(ds, sites) if len(newSites) > 0: # Add the files for these blocks to the fileset for file in fileInfoMap: fi = fileInfoMap[file] # First add parent file if fileset.requireParents: parentFile = File(lfn=fi["file.parent"]) parentFile.save() parentFile.setLocation(newSites) # Add actual file fileToAdd = File(lfn=file, size=fi["file.size"], events=fi["file.events"], run=watch.run, umi=fi["file.lumi"]) if not fileToAdd.exists() and fileset.requireParents: fileToAdd.addParent(fi["file.parent"]) # Add new locations but don't persist immediately fileToAdd.setLocations(newSites, immediateSave=False) # Add the file to the new file list fileset.addFile(fileToAdd) # Add the site info to the watcher list watchCompleteFiles.append([watch, ds, newSites]) # Commit the fileset fileset.commit() # Add the watched runs for a in watchCompleteFiles: a[0].addCompletedNodes(a[1], a[2]) except: # Reset the watch list so we re-evaluate next call watchCompleteFiles = [] # Purge old runs self.purgeWatchedRuns()
def testParallelProcessing(self): """ _testParallelProcessing_ Verify that merging works correctly when multiple processing subscriptions are run over the same input files. The merging algorithm should ignore processing jobs that feed into different merge subscriptions. """ locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute(siteName = "s1", seName = "somese.cern.ch") mergeFilesetA = Fileset(name = "mergeFilesetA") mergeFilesetB = Fileset(name = "mergeFilesetB") mergeFilesetA.create() mergeFilesetB.create() mergeMergedFilesetA = Fileset(name = "mergeMergedFilesetA") mergeMergedFilesetB = Fileset(name = "mergeMergedFilesetB") mergeMergedFilesetA.create() mergeMergedFilesetB.create() mergeWorkflow = Workflow(name = "mergeWorkflow", spec = "bogus", owner = "Steve", task = "Test") mergeWorkflow.create() mergeSubscriptionA = Subscription(fileset = mergeFilesetA, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize") mergeSubscriptionB = Subscription(fileset = mergeFilesetB, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize") mergeSubscriptionA.create() mergeSubscriptionB.create() inputFileset = Fileset(name = "inputFileset") inputFileset.create() inputFileA = File(lfn = "inputLFNA") inputFileB = File(lfn = "inputLFNB") inputFileA.create() inputFileB.create() procWorkflowA = Workflow(name = "procWorkflowA", spec = "bunk2", owner = "Steve", task = "Test") procWorkflowA.create() procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA) procWorkflowB = Workflow(name = "procWorkflowB", spec = "bunk3", owner = "Steve", task = "Test2") procWorkflowB.create() procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB) procSubscriptionA = Subscription(fileset = inputFileset, workflow = procWorkflowA, split_algo = "EventBased") procSubscriptionA.create() procSubscriptionB = Subscription(fileset = inputFileset, workflow = procWorkflowB, split_algo = "EventBased") procSubscriptionB.create() jobGroupA = JobGroup(subscription = procSubscriptionA) jobGroupA.create() jobGroupB = JobGroup(subscription = procSubscriptionB) jobGroupB.create() changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState") testJobA = Job() testJobA.addFile(inputFileA) testJobA.create(jobGroupA) testJobA["state"] = "cleanout" testJobA["oldstate"] = "new" testJobA["couch_record"] = "somejive" testJobA["retry_count"] = 0 testJobA["outcome"] = "success" testJobA.save() testJobB = Job() testJobB.addFile(inputFileB) testJobB.create(jobGroupA) testJobB["state"] = "cleanout" testJobB["oldstate"] = "new" testJobB["couch_record"] = "somejive" testJobB["retry_count"] = 0 testJobB["outcome"] = "success" testJobB.save() testJobC = Job() testJobC.addFile(inputFileA) testJobC.create(jobGroupB) testJobC["state"] = "cleanout" testJobC["oldstate"] = "new" testJobC["couch_record"] = "somejive" testJobC["retry_count"] = 0 testJobC["outcome"] = "success" testJobC.save() testJobD = Job() testJobD.addFile(inputFileA) testJobD.create(jobGroupB) testJobD["state"] = "cleanout" testJobD["oldstate"] = "new" testJobD["couch_record"] = "somejive" testJobD["retry_count"] = 0 testJobD["outcome"] = "failure" testJobD.save() testJobE = Job() testJobE.addFile(inputFileB) testJobE.create(jobGroupB) testJobE["state"] = "cleanout" testJobE["oldstate"] = "new" testJobE["couch_record"] = "somejive" testJobE["retry_count"] = 0 testJobE["outcome"] = "success" testJobE.save() testJobF = Job() testJobF.addFile(inputFileB) testJobF.create(jobGroupB) testJobF["state"] = "cleanout" testJobF["oldstate"] = "new" testJobF["couch_record"] = "somejive" testJobF["retry_count"] = 0 testJobF["outcome"] = "failure" testJobF.save() changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD, testJobE, testJobF]) fileA = File(lfn = "fileA", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileA.addRun(Run(1, *[45])) fileA.create() fileA.addParent(inputFileA["lfn"]) fileB = File(lfn = "fileB", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileB.addRun(Run(1, *[45])) fileB.create() fileB.addParent(inputFileB["lfn"]) jobGroupA.output.addFile(fileA) jobGroupA.output.addFile(fileB) jobGroupA.output.commit() mergeFilesetA.addFile(fileA) mergeFilesetA.addFile(fileB) mergeFilesetA.commit() fileC = File(lfn = "fileC", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileC.addRun(Run(1, *[45])) fileC.create() fileC.addParent(inputFileA["lfn"]) fileD = File(lfn = "fileD", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileD.addRun(Run(1, *[45])) fileD.create() fileD.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileC) jobGroupB.output.addFile(fileD) mergeFilesetB.addFile(fileC) mergeFilesetB.addFile(fileD) mergeFilesetB.commit() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = mergeSubscriptionB) result = jobFactory(min_merge_size = 1, max_merge_size = 20000, max_merge_events = 7169) assert len(result) == 0, \ "Error: No merge jobs should have been created." fileE = File(lfn = "fileE", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileE.addRun(Run(1, *[45])) fileE.create() fileE.addParent(inputFileA["lfn"]) fileF = File(lfn = "fileF", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileF.addRun(Run(1, *[45])) fileF.create() fileF.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileE) jobGroupB.output.addFile(fileF) mergeFilesetB.addFile(fileE) mergeFilesetB.addFile(fileF) mergeFilesetB.commit() testJobD["outcome"] = "success" testJobD.save() testJobF["outcome"] = "success" testJobF.save() changeStateDAO.execute([testJobD, testJobF]) result = jobFactory(min_merge_size = 1, max_merge_size = 20000, max_merge_events = 7169) assert len(result) == 1, \ "Error: One merge job should have been created: %s" % len(result) return
def createTestJobGroup(self, nJobs = 10, retry_count = 0, workloadPath = 'test'): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec = workloadPath, owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFile0 = File(lfn = "/this/is/a/parent", size = 1024, events = 10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('malpaquet') testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation('malpaquet') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn = "/this/is/a/parent") testFileB.addParent(lfn = "/this/is/a/parent") for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['group'] = 'BadGuys' testJob['user'] = '******' testJob['taskType'] = 'Merge' #testJob['fwjr'] = myReport testJobGroup.add(testJob) testJob.create(group = testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files = [testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def stuffWMBS(self, injected = True): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute(siteName = "s1", seName = "somese.cern.ch") changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState") self.mergeFileset = Fileset(name = "mergeFileset") self.mergeFileset.create() self.bogusFileset = Fileset(name = "bogusFileset") self.bogusFileset.create() self.mergeMergedFileset = Fileset(name = "mergeMergedFileset") self.mergeMergedFileset.create() self.bogusMergedFileset = Fileset(name = "bogusMergedFileset") self.bogusMergedFileset.create() mergeWorkflow = Workflow(name = "mergeWorkflow", spec = "bunk2", owner = "Steve", task="Test") mergeWorkflow.create() markWorkflow = self.daoFactory(classname = "Workflow.MarkInjectedWorkflows") markWorkflow.execute(names = [mergeWorkflow.name], injected = injected) self.mergeSubscription = Subscription(fileset = self.mergeFileset, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize") self.mergeSubscription.create() self.bogusSubscription = Subscription(fileset = self.bogusFileset, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize") inputFileset = Fileset(name = "inputFileset") inputFileset.create() inputWorkflow = Workflow(name = "inputWorkflow", spec = "input", owner = "Steve", task = "Test") inputWorkflow.create() inputWorkflow.addOutput("output", self.mergeFileset, self.mergeMergedFileset) inputWorkflow.addOutput("output2", self.bogusFileset, self.bogusMergedFileset) bogusInputWorkflow = Workflow(name = "bogusInputWorkflow", spec = "input", owner = "Steve", task = "Test") bogusInputWorkflow.create() inputSubscription = Subscription(fileset = inputFileset, workflow = inputWorkflow) inputSubscription.create() bogusInputSubscription = Subscription(fileset = inputFileset, workflow = bogusInputWorkflow) bogusInputSubscription.create() parentFile1 = File(lfn = "parentFile1") parentFile1.create() parentFile2 = File(lfn = "parentFile2") parentFile2.create() parentFile3 = File(lfn = "parentFile3") parentFile3.create() parentFile4 = File(lfn = "parentFile4") parentFile4.create() self.parentFileSite2 = File(lfn = "parentFileSite2") self.parentFileSite2.create() jobGroup1 = JobGroup(subscription = inputSubscription) jobGroup1.create() jobGroup2 = JobGroup(subscription = inputSubscription) jobGroup2.create() jobGroup3 = JobGroup(subscription = bogusInputSubscription) jobGroup3.create() testJob1 = Job() testJob1.addFile(parentFile1) testJob1.create(jobGroup1) testJob1["state"] = "cleanout" testJob1["oldstate"] = "new" testJob1["couch_record"] = "somejive" testJob1["retry_count"] = 0 testJob1["outcome"] = "success" testJob1.save() changeStateDAO.execute([testJob1]) testJob1A = Job() testJob1A.addFile(parentFile1) testJob1A.create(jobGroup3) testJob1A["state"] = "cleanout" testJob1A["oldstate"] = "new" testJob1A["couch_record"] = "somejive" testJob1A["retry_count"] = 0 testJob1A["outcome"] = "failure" testJob1A.save() changeStateDAO.execute([testJob1A]) testJob2 = Job() testJob2.addFile(parentFile2) testJob2.create(jobGroup1) testJob2["state"] = "cleanout" testJob2["oldstate"] = "new" testJob2["couch_record"] = "somejive" testJob2["retry_count"] = 0 testJob2["outcome"] = "success" testJob2.save() changeStateDAO.execute([testJob2]) testJob3 = Job() testJob3.addFile(parentFile3) testJob3.create(jobGroup2) testJob3["state"] = "cleanout" testJob3["oldstate"] = "new" testJob3["couch_record"] = "somejive" testJob3["retry_count"] = 0 testJob3["outcome"] = "success" testJob3.save() changeStateDAO.execute([testJob3]) testJob4 = Job() testJob4.addFile(parentFile4) testJob4.create(jobGroup2) testJob4["state"] = "cleanout" testJob4["oldstate"] = "new" testJob4["couch_record"] = "somejive" testJob4["retry_count"] = 0 testJob4["outcome"] = "failure" testJob4.save() changeStateDAO.execute([testJob4]) # We'll simulate a failed split by event job that the merger should # ignore. parentFile5 = File(lfn = "parentFile5") parentFile5.create() testJob5 = Job() testJob5.addFile(parentFile5) testJob5.create(jobGroup2) testJob5["state"] = "cleanout" testJob5["oldstate"] = "new" testJob5["couch_record"] = "somejive" testJob5["retry_count"] = 0 testJob5["outcome"] = "success" testJob5.save() changeStateDAO.execute([testJob5]) testJob6 = Job() testJob6.addFile(parentFile5) testJob6.create(jobGroup2) testJob6["state"] = "cleanout" testJob6["oldstate"] = "new" testJob6["couch_record"] = "somejive" testJob6["retry_count"] = 0 testJob6["outcome"] = "failure" testJob6.save() changeStateDAO.execute([testJob6]) testJob7 = Job() testJob7.addFile(self.parentFileSite2) testJob7.create(jobGroup2) testJob7["state"] = "cleanout" testJob7["oldstate"] = "new" testJob7["couch_record"] = "somejive" testJob7["retry_count"] = 0 testJob7["outcome"] = "success" testJob7.save() changeStateDAO.execute([testJob7]) badFile1 = File(lfn = "badFile1", size = 10241024, events = 10241024, first_event = 0, locations = set(["somese.cern.ch"])) badFile1.addRun(Run(1, *[45])) badFile1.create() badFile1.addParent(parentFile5["lfn"]) file1 = File(lfn = "file1", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) file1.addRun(Run(1, *[45])) file1.create() file1.addParent(parentFile1["lfn"]) file2 = File(lfn = "file2", size = 1024, events = 1024, first_event = 1024, locations = set(["somese.cern.ch"])) file2.addRun(Run(1, *[45])) file2.create() file2.addParent(parentFile1["lfn"]) file3 = File(lfn = "file3", size = 1024, events = 1024, first_event = 2048, locations = set(["somese.cern.ch"])) file3.addRun(Run(1, *[45])) file3.create() file3.addParent(parentFile1["lfn"]) file4 = File(lfn = "file4", size = 1024, events = 1024, first_event = 3072, locations = set(["somese.cern.ch"])) file4.addRun(Run(1, *[45])) file4.create() file4.addParent(parentFile1["lfn"]) fileA = File(lfn = "fileA", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileA.addRun(Run(1, *[46])) fileA.create() fileA.addParent(parentFile2["lfn"]) fileB = File(lfn = "fileB", size = 1024, events = 1024, first_event = 1024, locations = set(["somese.cern.ch"])) fileB.addRun(Run(1, *[46])) fileB.create() fileB.addParent(parentFile2["lfn"]) fileC = File(lfn = "fileC", size = 1024, events = 1024, first_event = 2048, locations = set(["somese.cern.ch"])) fileC.addRun(Run(1, *[46])) fileC.create() fileC.addParent(parentFile2["lfn"]) fileI = File(lfn = "fileI", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileI.addRun(Run(2, *[46])) fileI.create() fileI.addParent(parentFile3["lfn"]) fileII = File(lfn = "fileII", size = 1024, events = 1024, first_event = 1024, locations = set(["somese.cern.ch"])) fileII.addRun(Run(2, *[46])) fileII.create() fileII.addParent(parentFile3["lfn"]) fileIII = File(lfn = "fileIII", size = 1024, events = 1024, first_event = 2048, locations = set(["somese.cern.ch"])) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIII.addParent(parentFile3["lfn"]) fileIV = File(lfn = "fileIV", size = 1024, events = 1024, first_event = 3072, locations = set(["somese.cern.ch"])) fileIV.addRun(Run(2, *[46])) fileIV.create() fileIV.addParent(parentFile3["lfn"]) fileX = File(lfn = "badFileA", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileX.addRun(Run(1, *[47])) fileX.create() fileX.addParent(parentFile4["lfn"]) fileY = File(lfn = "badFileB", size = 1024, events = 1024, first_event = 1024, locations = set(["somese.cern.ch"])) fileY.addRun(Run(1, *[47])) fileY.create() fileY.addParent(parentFile4["lfn"]) fileZ = File(lfn = "badFileC", size = 1024, events = 1024, first_event = 2048, locations = set(["somese.cern.ch"])) fileZ.addRun(Run(1, *[47])) fileZ.create() fileZ.addParent(parentFile4["lfn"]) jobGroup1.output.addFile(file1) jobGroup1.output.addFile(file2) jobGroup1.output.addFile(file3) jobGroup1.output.addFile(file4) jobGroup1.output.addFile(fileA) jobGroup1.output.addFile(fileB) jobGroup1.output.addFile(fileC) jobGroup1.output.commit() jobGroup2.output.addFile(fileI) jobGroup2.output.addFile(fileII) jobGroup2.output.addFile(fileIII) jobGroup2.output.addFile(fileIV) jobGroup2.output.addFile(fileX) jobGroup2.output.addFile(fileY) jobGroup2.output.addFile(fileZ) jobGroup2.output.addFile(badFile1) jobGroup2.output.commit() for file in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]: self.mergeFileset.addFile(file) self.bogusFileset.addFile(file) self.mergeFileset.commit() self.bogusFileset.commit() return
def __call__(self, filesets): """ The algorithm itself """ # Update run list self.getNewRuns() # Do per fileset work, abandon fileset processing on exception for fileset in filesets: ds = fileset.name try: # Do per run work watchCompleteFiles = [] for watch in self.watchedRuns: # Ensure watcher has dataset listed watch.addDatasetOfInterest(ds) # Query DBS to find all blocks for this run / dataset (files, blocks, fileInfoMap) = \ self.dbsHelper.getFileInfo(watch.run, ds) # Now determine all required parent blocks parentBlocks = set() if fileset.requireParents: parentDs = self.dbsHelper.getParentDataset(ds) parentBlocks = self.dbsHelper.getBlockInfo( watch.run, parentDs) # Final list of all required blocks allBlocks = blocks[:] allBlocks.update(parentBlocks) # Find all sites where all blocks are complete sites = self.phedexHelper.getCompleteSites(blocks) # Get sites with newly completed transfers newSites = watch.getNewSites(ds, sites) if len(newSites) > 0: # Add the files for these blocks to the fileset for file in fileInfoMap: fi = fileInfoMap[file] # First add parent file if fileset.requireParents: parentFile = File(lfn=fi["file.parent"]) parentFile.save() parentFile.setLocation(newSites) # Add actual file fileToAdd = File(lfn=file, size=fi["file.size"], events=fi["file.events"], run=watch.run, umi=fi["file.lumi"]) if not fileToAdd.exists( ) and fileset.requireParents: fileToAdd.addParent(fi["file.parent"]) # Add new locations but don't persist immediately fileToAdd.setLocations(newSites, immediateSave=False) # Add the file to the new file list fileset.addFile(fileToAdd) # Add the site info to the watcher list watchCompleteFiles.append([watch, ds, newSites]) # Commit the fileset fileset.commit() # Add the watched runs for a in watchCompleteFiles: a[0].addCompletedNodes(a[1], a[2]) except: # Reset the watch list so we re-evaluate next call watchCompleteFiles = [] # Purge old runs self.purgeWatchedRuns()
def populateWMBS(self): """ _populateWMBS_ Create files and subscriptions in WMBS """ myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName='s1', seName="somese.cern.ch") locationAction.execute(siteName='s2', seName="otherse.cern.ch") self.validLocations = ["somese.cern.ch", "otherse.cern.ch"] self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size=1000, events=100, locations=set(["somese.cern.ch"])) parentFile.create() for _ in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.create() newFile.addParent(lfn=parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name="TestFileset3") self.multipleSiteFileset.create() for _ in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation("somese.cern.ch") newFile.create() self.multipleSiteFileset.addFile(newFile) for _ in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation(["somese.cern.ch", "otherse.cern.ch"]) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription( fileset=self.multipleSiteFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") self.multipleSiteSubscription.create() return
def createTestJobGroup(self, nJobs = 10, retry_count = 1, workloadPath = 'test', fwjrPath = None, workloadName = makeUUID()): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec = workloadPath, owner = "cmsdataops", group = "cmsdataops", name = workloadName, task="/TestWorkload/ReReco") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFile0 = File(lfn = "/this/is/a/parent", size = 1024, events = 10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('malpaquet') testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, first_event = 88, last_event = 99) testFileA.addRun(Run(10, *[12312, 12313])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10, first_event = 88, last_event = 99) testFileB.addRun(Run(10, *[12314, 12315, 12316])) testFileB.setLocation('malpaquet') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn = "/this/is/a/parent") testFileB.addParent(lfn = "/this/is/a/parent") for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run = 10, lumis = [12312]) testJob['mask'].addRunAndLumis(run = 10, lumis = [12314, 12316]) testJob['mask']['FirstEvent'] = 100 testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) testJob['fwjr_path'] = fwjrPath os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJob.create(group = testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files = [testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def createTestJobGroup(self, nJobs=10, retry_count=1, workloadPath='test', fwjrPath=None, workloadName=makeUUID(), fileModifier=''): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec=workloadPath, owner="cmsdataops", group="cmsdataops", name=workloadName, task="/TestWorkload/ReReco") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier, size=1024, events=10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('T2_CH_CERN') testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileA.addRun(Run(10, *[12312, 12313])) testFileA.setLocation('T2_CH_CERN') testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileB.addRun(Run(10, *[12314, 12315, 12316])) testFileB.setLocation('T2_CH_CERN') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier) testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier) for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312]) testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316]) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) testJob['fwjr_path'] = fwjrPath os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJob.create(group=testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files=[testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def testParallelProcessing(self): """ _testParallelProcessing_ Verify that merging works correctly when multiple processing subscriptions are run over the same input files. The merging algorithm should ignore processing jobs that feed into different merge subscriptions. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN") locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN") mergeFilesetA = Fileset(name="mergeFilesetA") mergeFilesetB = Fileset(name="mergeFilesetB") mergeFilesetA.create() mergeFilesetB.create() mergeMergedFilesetA = Fileset(name="mergeMergedFilesetA") mergeMergedFilesetB = Fileset(name="mergeMergedFilesetB") mergeMergedFilesetA.create() mergeMergedFilesetB.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bogus", owner="Steve", task="Test") mergeWorkflow.create() mergeSubscriptionA = Subscription(fileset=mergeFilesetA, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") mergeSubscriptionB = Subscription(fileset=mergeFilesetB, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") mergeSubscriptionA.create() mergeSubscriptionB.create() inputFileset = Fileset(name="inputFileset") inputFileset.create() inputFileA = File(lfn="inputLFNA") inputFileB = File(lfn="inputLFNB") inputFileA.create() inputFileB.create() procWorkflowA = Workflow(name="procWorkflowA", spec="bunk2", owner="Steve", task="Test") procWorkflowA.create() procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA) procWorkflowB = Workflow(name="procWorkflowB", spec="bunk3", owner="Steve", task="Test2") procWorkflowB.create() procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB) procSubscriptionA = Subscription(fileset=inputFileset, workflow=procWorkflowA, split_algo="EventBased") procSubscriptionA.create() procSubscriptionB = Subscription(fileset=inputFileset, workflow=procWorkflowB, split_algo="EventBased") procSubscriptionB.create() jobGroupA = JobGroup(subscription=procSubscriptionA) jobGroupA.create() jobGroupB = JobGroup(subscription=procSubscriptionB) jobGroupB.create() changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") testJobA = Job() testJobA.addFile(inputFileA) testJobA.create(jobGroupA) testJobA["state"] = "cleanout" testJobA["oldstate"] = "new" testJobA["couch_record"] = "somejive" testJobA["retry_count"] = 0 testJobA["outcome"] = "success" testJobA.save() testJobB = Job() testJobB.addFile(inputFileB) testJobB.create(jobGroupA) testJobB["state"] = "cleanout" testJobB["oldstate"] = "new" testJobB["couch_record"] = "somejive" testJobB["retry_count"] = 0 testJobB["outcome"] = "success" testJobB.save() testJobC = Job() testJobC.addFile(inputFileA) testJobC.create(jobGroupB) testJobC["state"] = "cleanout" testJobC["oldstate"] = "new" testJobC["couch_record"] = "somejive" testJobC["retry_count"] = 0 testJobC["outcome"] = "success" testJobC.save() testJobD = Job() testJobD.addFile(inputFileA) testJobD.create(jobGroupB) testJobD["state"] = "cleanout" testJobD["oldstate"] = "new" testJobD["couch_record"] = "somejive" testJobD["retry_count"] = 0 testJobD["outcome"] = "failure" testJobD.save() testJobE = Job() testJobE.addFile(inputFileB) testJobE.create(jobGroupB) testJobE["state"] = "cleanout" testJobE["oldstate"] = "new" testJobE["couch_record"] = "somejive" testJobE["retry_count"] = 0 testJobE["outcome"] = "success" testJobE.save() testJobF = Job() testJobF.addFile(inputFileB) testJobF.create(jobGroupB) testJobF["state"] = "cleanout" testJobF["oldstate"] = "new" testJobF["couch_record"] = "somejive" testJobF["retry_count"] = 0 testJobF["outcome"] = "failure" testJobF.save() changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD, testJobE, testJobF]) fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileA.addRun(Run(1, *[45])) fileA.create() fileA.addParent(inputFileA["lfn"]) fileB = File(lfn="fileB", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileB.addRun(Run(1, *[45])) fileB.create() fileB.addParent(inputFileB["lfn"]) jobGroupA.output.addFile(fileA) jobGroupA.output.addFile(fileB) jobGroupA.output.commit() mergeFilesetA.addFile(fileA) mergeFilesetA.addFile(fileB) mergeFilesetA.commit() fileC = File(lfn="fileC", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileC.addRun(Run(1, *[45])) fileC.create() fileC.addParent(inputFileA["lfn"]) fileD = File(lfn="fileD", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileD.addRun(Run(1, *[45])) fileD.create() fileD.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileC) jobGroupB.output.addFile(fileD) mergeFilesetB.addFile(fileC) mergeFilesetB.addFile(fileD) mergeFilesetB.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=mergeSubscriptionB) result = jobFactory(min_merge_size=1, max_merge_size=20000, max_merge_events=7169) assert len(result) == 0, \ "Error: No merge jobs should have been created." fileE = File(lfn="fileE", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileE.addRun(Run(1, *[45])) fileE.create() fileE.addParent(inputFileA["lfn"]) fileF = File(lfn="fileF", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileF.addRun(Run(1, *[45])) fileF.create() fileF.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileE) jobGroupB.output.addFile(fileF) mergeFilesetB.addFile(fileE) mergeFilesetB.addFile(fileF) mergeFilesetB.commit() testJobD["outcome"] = "success" testJobD.save() testJobF["outcome"] = "success" testJobF.save() changeStateDAO.execute([testJobD, testJobF]) result = jobFactory(min_merge_size=1, max_merge_size=20000, max_merge_events=7169) assert len(result) == 1, \ "Error: One merge job should have been created: %s" % len(result) return
def testGetInfo(self): """ _testGetInfo_ Test the getInfo() method of the File class to make sure that it returns the correct information. """ testFileParent = File(lfn="/this/is/a/parent/lfn", size=1024, events=20, checksums={'cksum': 1111}) testFileParent.addRun(Run(1, *[45])) testFileParent.create() testFile = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 222}) testFile.addRun(Run(1, *[45])) testFile.addRun(Run(2, *[46, 47])) testFile.addRun(Run(2, *[47, 48])) testFile.create() testFile.setLocation(se="se1.fnal.gov", immediateSave=False) testFile.setLocation(se="se1.cern.ch", immediateSave=False) testFile.addParent("/this/is/a/parent/lfn") info = testFile.getInfo() assert info[0] == testFile["lfn"], \ "ERROR: File returned wrong LFN" assert info[1] == testFile["id"], \ "ERROR: File returned wrong ID" assert info[2] == testFile["size"], \ "ERROR: File returned wrong size" assert info[3] == testFile["events"], \ "ERROR: File returned wrong events" assert info[4] == testFile["checksums"], \ "ERROR: File returned wrong cksum" assert len(info[5]) == 2, \ "ERROR: File returned wrong runs" assert info[5] == [Run(1, *[45]), Run(2, *[46, 47, 48])], \ "Error: Run hasn't been combined correctly" assert len(info[6]) == 2, \ "ERROR: File returned wrong locations" for testLocation in info[6]: assert testLocation in ["se1.fnal.gov", "se1.cern.ch"], \ "ERROR: File returned wrong locations" assert len(info[7]) == 1, \ "ERROR: File returned wrong parents" assert info[7][0] == testFileParent, \ "ERROR: File returned wrong parents" testFile.delete() testFileParent.delete() return