def testCreateDeleteExistsNoFiles(self): """ _testCreateDeleteExistsNoFiles_ Create and then delete a job but don't add any input files to it. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="TestJob") assert testJob.exists() == False, "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, "ERROR: Job exists after it was delete" return
def createTestJob(subscriptionType="Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name=makeUUID(), files=[testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup) testJob.associateFiles() return testJob
def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102]) testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob['mask'].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob['mask'].filterRunLumisByMask([run0, run1]) return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create and then delete a job. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) testWU = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[44])) self.assertFalse(testJob.exists(), "Job exists before it was created") self.assertFalse(testWU.exists(), "WorkUnit exists before it was created") testJob.create(group=testJobGroup) self.assertTrue(testJob.exists(), "Job does not exist after it was created") self.assertFalse(testWU.exists(), "WorkUnit exists when there is no work") # Test the getWorkflow method workflow = testJob.getWorkflow() self.assertEqual(workflow['task'], 'Test') self.assertEqual(workflow['name'], 'wf001') testJob.delete() self.assertFalse(testJob.exists(), "Job exists after it was deleted") self.assertFalse(testWU.exists(), "WorkUnit exists after job is deleted") return
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site = None, bl = [], wl = []): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn = "/singleLfn/%s/%s" %(name, n), size = 1024, events = 10) if site: testFile.setLocation(site) else: for tmpSite in self.sites: testFile.setLocation('se.%s' % (tmpSite)) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name = '%s-%i' %(name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob['custom']['location'] = f.getLocations()[0] testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob['owner'] = 'tapas' testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob['ownerDN'] = 'tapas' testJob['ownerRole'] = 'cmsrole' testJob['ownerGroup'] = 'phgroup' jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'),'w') pickle.dump(testJob, output) output.close() return testJob, testFile
def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a new job and commit it to the database. Start a new transaction and delete the file from the database. Verify that the file has been deleted. After that, roll back the transaction and verify that the job is once again in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() == False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" myThread = threading.currentThread() myThread.transaction.begin() testJob.delete() assert testJob.exists() == False, \ "ERROR: Job exists after it was delete" myThread.transaction.rollback() assert testJob.exists() >= 0, \ "ERROR: Job does not exist after transaction was rolled back." return
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site=None, bl=[], wl=[]): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, "CacheDir") for n in range(nJobs): # First make a file # site = self.sites[0] testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10) if site: testFile.setLocation(site) else: for tmpSite in self.sites: testFile.setLocation("se.%s" % (tmpSite)) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name="%s-%i" % (name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob["custom"]["location"] = f.getLocations()[0] testJob["task"] = task.getPathName() testJob["sandbox"] = task.data.input.sandbox testJob["spec"] = os.path.join(self.testDir, "basicWorkload.pcl") testJob["mask"]["FirstEvent"] = 101 testJob["owner"] = "tapas" testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob["ownerDN"] = "tapas" testJob["ownerRole"] = "cmsrole" testJob["ownerGroup"] = "phgroup" jobCache = os.path.join(cacheDir, "Sub_%i" % (sub), "Job_%i" % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob["cache_dir"] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, "job.pkl"), "w") pickle.dump(testJob, output) output.close() return testJob, testFile
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file # site = self.sites[0] testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10) fileset.addFile(testFile) fileset.commit() location = None if isinstance(site, list): if len(site) > 0: location = site[0] else: location = site index = 0 for f in fileset.files: index += 1 testJob = Job(name='%s-%i' % (name, index)) testJob.addFile(f) testJob["location"] = location testJob["possiblePSN"] = set(site) if isinstance( site, list) else set([site]) testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob['priority'] = 101 testJob['numberOfCores'] = 1 jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'), 'w') pickle.dump(testJob, output) output.close() return testJob, testFile
def testParentageByJob(self): """ _testParentageByJob_ Tests the DAO that assigns parentage by Job """ testWorkflow = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testJobA = Job() testJobA.create(group = testJobGroup) testJobA.addFile(testFileParentA) testJobA.addFile(testFileParentB) testJobA.associateFiles() parentAction = self.daofactory(classname = "Files.SetParentageByJob") parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']}) testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, True, "ERROR: Unknown parent file") goldenFiles.remove(parentFile) self.assertEqual(len(goldenFiles), 0, "ERROR: Some parents are missing")
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site, bl = [], wl = []): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn = "/singleLfn/%s/%s" % (name, n), size = 1024, events = 10) if type(site) == list: for singleSite in site: testFile.setLocation(singleSite) else: testFile.setLocation(site) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name = '%s-%i' % (name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob['priority'] = 101 testJob['multicoreEnabled'] = False testJob['numberOfCores'] = 1 jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'), 'w') pickle.dump(testJob, output) output.close() return testJob, testFile
def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a new job and commit it to the database. Start a new transaction and delete the file from the database. Verify that the file has been deleted. After that, roll back the transaction and verify that the job is once again in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() is False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" myThread = threading.currentThread() myThread.transaction.begin() testJob.delete() assert testJob.exists() is False, \ "ERROR: Job exists after it was delete" myThread.transaction.rollback() assert testJob.exists() >= 0, \ "ERROR: Job does not exist after transaction was rolled back." return
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn = "/singleLfn/%s/%s" % (name, n), size = 1024, events = 10) fileset.addFile(testFile) fileset.commit() location = None if isinstance(site, list): if len(site) > 0: location = site[0] else: location = site index = 0 for f in fileset.files: index += 1 testJob = Job(name = '%s-%i' % (name, index)) testJob.addFile(f) testJob["location"] = location testJob["possiblePSN"] = set(site) if isinstance(site, list) else set([site]) testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob['priority'] = 101 testJob['numberOfCores'] = 1 jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'), 'w') pickle.dump(testJob, output) output.close() return testJob, testFile
def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name='%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup
def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name = '%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == "mysql": return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def notestCreateDeleteExists(self): """ Create and then delete a job and workflow. Use the workunit class's exists() method to determine if the workunit has been written to the database before the job is created, after the job has been created, and after the workflow has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) testWU1 = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[45])) testWU2 = WorkUnit(taskID=testWorkflow.id, fileid=testFileB['id'], runLumi=Run(1, *[46])) self.assertFalse(testWU1.exists(), "WorkUnit exists before job was created") self.assertFalse(testWU2.exists(), "WorkUnit exists before job was created") testJob.create(group=testJobGroup) self.assertTrue(testWU1.exists(), "WorkUnit does not exist after job was created") self.assertTrue(testWU2.exists(), "WorkUnit does not exist after job was created") testJob.delete() self.assertTrue(testWU1.exists(), "WorkUnit does not exist after job is deleted") self.assertTrue(testWU2.exists(), "WorkUnit does not exist after job is deleted") testWorkflow.delete() self.assertFalse(testWU1.exists(), "WorkUnit exists after workflow is deleted") self.assertFalse(testWU2.exists(), "WorkUnit exists after workflow is deleted") return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create and then delete a job. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) # testWU = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[44])) self.assertFalse(testJob.exists(), "Job exists before it was created") # self.assertFalse(testWU.exists(), "WorkUnit exists before it was created") testJob.create(group=testJobGroup) self.assertTrue(testJob.exists(), "Job does not exist after it was created") # self.assertFalse(testWU.exists(), "WorkUnit exists when there is no work") # Test the getWorkflow method workflow = testJob.getWorkflow() self.assertEqual(workflow['task'], 'Test') self.assertEqual(workflow['name'], 'wf001') testJob.delete() self.assertFalse(testJob.exists(), "Job exists after it was deleted") # self.assertFalse(testWU.exists(), "WorkUnit exists after job is deleted") return
def createDummyJobs(self, nJobs, location=None): """ _createDummyJobs_ Create some dummy jobs """ if not location: location = self.sites[0] nameStr = makeUUID() testWorkflow = Workflow( spec=nameStr, owner="tapas", name=nameStr, task="basicWorkload/Production", owner_vogroup="phgroup", owner_vorole="cmsrole", ) testWorkflow.create() testFileset = Fileset(name=nameStr) testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() jobList = [] for i in range(nJobs): testJob = Job(name="%s-%i" % (nameStr, i)) testJob["location"] = location testJob["custom"]["location"] = location testJob["userdn"] = "tapas" testJob["owner"] = "tapas" testJob["userrole"] = "cmsrole" testJob["usergroup"] = "phgroup" testJob.create(testJobGroup) jobList.append(testJob) return jobList
def createTestJob(self, testSubscription, jobName, *testFiles): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFiles = list(testFiles) testJob = Job(name=jobName, files=testFiles) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup)
def createDummyJobs(self, nJobs, location=None): """ _createDummyJobs_ Create some dummy jobs """ if not location: location = self.sites[0] nameStr = makeUUID() testWorkflow = Workflow(spec=nameStr, owner="tapas", name=nameStr, task="basicWorkload/Production", owner_vogroup='phgroup', owner_vorole='cmsrole') testWorkflow.create() testFileset = Fileset(name=nameStr) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() jobList = [] for i in range(nJobs): testJob = Job(name='%s-%i' % (nameStr, i)) testJob['location'] = location testJob['custom']['location'] = location testJob['userdn'] = 'tapas' testJob['owner'] = 'tapas' testJob['userrole'] = 'cmsrole' testJob['usergroup'] = 'phgroup' testJob.create(testJobGroup) jobList.append(testJob) return jobList
def testAddChecksumsByLFN(self): """ _testAddChecksumsByLFN_ Tests for adding checksums by DAO by LFN """ testWorkflow = Workflow(spec="hello", owner="mnorman", name="wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[45])) testFileB.create() testJobA = Job() testJobA.create(group=testJobGroup) testJobA.associateFiles() parentAction = self.daofactory(classname="Files.AddChecksumByLFN") binds = [ {"lfn": testFileA["lfn"], "cktype": "cksum", "cksum": 101}, {"lfn": testFileA["lfn"], "cktype": "adler32", "cksum": 201}, {"lfn": testFileB["lfn"], "cktype": "cksum", "cksum": 101}, ] parentAction.execute(bulkList=binds) testFileC = File(id=testFileA["id"]) testFileC.loadData() testFileD = File(id=testFileB["id"]) testFileD.loadData() self.assertEqual(testFileC["checksums"], {"adler32": "201", "cksum": "101"}) self.assertEqual(testFileD["checksums"], {"cksum": "101"}) return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create and then delete a job. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() == False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, \ "ERROR: Job exists after it was delete" return
def createDummyJobs(self, nJobs, location = None): """ _createDummyJobs_ Create some dummy jobs """ if not location: location = self.sites[0] nameStr = makeUUID() testWorkflow = Workflow(spec = nameStr, owner = "mnorman", name = nameStr, task="basicWorkload/Production", owner_vogroup = 'phgroup', owner_vorole = 'cmsrole') testWorkflow.create() testFileset = Fileset(name = nameStr) testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() jobList = [] for i in range(nJobs): testJob = Job(name = '%s-%i' % (nameStr, i)) testJob['location'] = location testJob['custom']['location'] = location testJob['userdn'] = 'mnorman' testJob['owner'] = 'mnorman' testJob['userrole'] = 'cmsrole' testJob['usergroup'] = 'phgroup' testJob.create(testJobGroup) jobList.append(testJob) return jobList
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create and then delete a job. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileA.create() testFileB.create() testJob = Job(name = "TestJob", files = [testFileA, testFileB]) assert testJob.exists() == False, \ "ERROR: Job exists before it was created" testJob.create(group = testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, \ "ERROR: Job exists after it was delete" return
def testLoadOutputID(self): """ _testLoadOutputID_ Test whether we can load an output ID for a job """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id) return
def testCreateDeleteExistsNoFiles(self): """ _testCreateDeleteExistsNoFiles_ Create and then delete a job but don't add any input files to it. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="TestJob") assert testJob.exists() == False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, \ "ERROR: Job exists after it was delete" return
def testLoadOutputID(self): """ _testLoadOutputID_ Test whether we can load an output ID for a job """ testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow) testSubscription.create() testFileA = File(lfn = makeUUID(), locations = "test.site.ch") testFileB = File(lfn = makeUUID(), locations = "test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJob = Job() testJob.create(group = testJobGroup) self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id) return
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec = makeUUID(), owner = "Steve", name = makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJobA = Job(name = makeUUID(), files = []) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroup) testJobA["state"] = "executing" testJobB = Job(name = makeUUID(), files = []) testJobB["couch_record"] = makeUUID() testJobB.create(group = testJobGroup) testJobB["state"] = "complete" testJobC = Job(name = makeUUID(), files = []) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def testCreateTransaction(self): """ _testCreateTransaction_ Create a job and save it to the database. Roll back the database transaction and verify that the job is no longer in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() myThread = threading.currentThread() myThread.transaction.begin() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() == False, "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, "ERROR: Job does not exist after it was created" myThread.transaction.rollback() assert testJob.exists() == False, "ERROR: Job exists after transaction was rolled back." return
def createJobs(self, nJobs): """ Creates a series of jobGroups for submissions """ testWorkflow = Workflow(spec="dummy", owner="mnorman", name="dummy", task="basicWorkload/Production") testWorkflow.create() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name="dummy") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs for id in range(nJobs): testJob = Job(name='Job_%i' % (id)) testJob['owner'] = "mnorman" testJob['location'] = 'Xanadu' testJob.create(testJobGroup) testJobGroup.add(testJob) testFileset.commit() testJobGroup.commit() return testJobGroup
def createJobs(self, nJobs): """ Creates a series of jobGroups for submissions """ testWorkflow = Workflow(spec = "dummy", owner = "mnorman", name = "dummy", task="basicWorkload/Production") testWorkflow.create() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name = "dummy") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() # Create jobs for id in range(nJobs): testJob = Job(name = 'Job_%i' % (id)) testJob['owner'] = "mnorman" testJob['location'] = 'Xanadu' testJob.create(testJobGroup) testJobGroup.add(testJob) testFileset.commit() testJobGroup.commit() return testJobGroup
def createJobs(self): """ _createJobs_ Create test jobs in WMBS and BossAir """ testWorkflow = Workflow(spec = makeUUID(), owner = "tapas", name = makeUUID(), task = "Test") testWorkflow.create() testFilesetA = Fileset(name = "TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name = "TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name = "TestFilesetC") testFilesetC.create() testFileA = File(lfn = "testFileA", locations = set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset = testFilesetA, workflow = testWorkflow, type = "Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{"site_name": "testSite1", "valid": True}]) testSubscriptionB = Subscription(fileset = testFilesetB, workflow = testWorkflow, type = "Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{"site_name": "testSite1", "valid": False}]) testSubscriptionC = Subscription(fileset = testFilesetC, workflow = testWorkflow, type = "Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription = testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription = testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription = testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name = "testJobA", files = [testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name = "testJobB", files = [testFileA]) testJobB["couch_record"] = makeUUID() testJobB["cache_dir"] = self.tempDir testJobB.create(group = testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name = "testJobC", files = [testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name = "testJobD", files = [testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group = testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name = "testJobE", files = [testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group = testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name = "testJobF", files = [testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group = testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name = "testJobG", files = [testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group = testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name = "testJobH", files = [testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group = testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name = "testJobI", files = [testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group = testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name = "testJobJ", files = [testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group = testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname = "Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite2") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") return
def createTestJobGroup(self, nJobs = 10, retry_count = 0, workloadPath = 'test'): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec = workloadPath, owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFile0 = File(lfn = "/this/is/a/parent", size = 1024, events = 10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('malpaquet') testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation('malpaquet') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn = "/this/is/a/parent") testFileB.addParent(lfn = "/this/is/a/parent") for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['group'] = 'BadGuys' testJob['user'] = '******' testJob['taskType'] = 'Merge' #testJob['fwjr'] = myReport testJobGroup.add(testJob) testJob.create(group = testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files = [testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def injectJobs(self): """ _injectJobs_ Inject two workflows into WMBS and save the job objects to disk. """ testWorkflowA = Workflow(spec="specA.pkl", owner="Steve", name="wf001", task="TestTaskA") testWorkflowA.create() testWorkflowB = Workflow(spec="specB.pkl", owner="Steve", name="wf002", task="TestTaskB") testWorkflowB.create() testFileset = Fileset("testFileset") testFileset.create() testSubA = Subscription(fileset=testFileset, workflow=testWorkflowA) testSubA.create() testSubB = Subscription(fileset=testFileset, workflow=testWorkflowB) testSubB.create() testGroupA = JobGroup(subscription=testSubA) testGroupA.create() testGroupB = JobGroup(subscription=testSubB) testGroupB.create() stateChanger = ChangeState(self.createConfig(), "jobsubmittercaching_t") for i in range(10): newFile = File(lfn="testFile%s" % i, locations=set(["se.T1_US_FNAL", "se.T1_UK_RAL"])) newFile.create() newJobA = Job(name="testJobA-%s" % i, files=[newFile]) newJobA["workflow"] = "wf001" newJobA["possiblePSN"] = ["T1_US_FNAL"] newJobA["sandbox"] = "%s/somesandbox" % self.testDir newJobA["owner"] = "Steve" jobCacheDir = os.path.join(self.testDir, "jobA-%s" % i) os.mkdir(jobCacheDir) newJobA["cache_dir"] = jobCacheDir newJobA["type"] = "Processing" newJobA['requestType'] = 'ReReco' newJobA.create(testGroupA) jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "wb") pickle.dump(newJobA, jobHandle) jobHandle.close() stateChanger.propagate([newJobA], "created", "new") newJobB = Job(name="testJobB-%s" % i, files=[newFile]) newJobB["workflow"] = "wf001" newJobB["possiblePSN"] = ["T1_UK_RAL"] newJobB["sandbox"] = "%s/somesandbox" % self.testDir newJobB["owner"] = "Steve" jobCacheDir = os.path.join(self.testDir, "jobB-%s" % i) os.mkdir(jobCacheDir) newJobB["cache_dir"] = jobCacheDir newJobB["type"] = "Processing" newJobB['requestType'] = 'ReReco' newJobB.create(testGroupB) jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "wb") pickle.dump(newJobB, jobHandle) jobHandle.close() stateChanger.propagate([newJobB], "created", "new") return
def testParallelProcessing(self): """ _testParallelProcessing_ Verify that merging works correctly when multiple processing subscriptions are run over the same input files. The merging algorithm should ignore processing jobs that feed into different merge subscriptions. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN") locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN") mergeFilesetA = Fileset(name="mergeFilesetA") mergeFilesetB = Fileset(name="mergeFilesetB") mergeFilesetA.create() mergeFilesetB.create() mergeMergedFilesetA = Fileset(name="mergeMergedFilesetA") mergeMergedFilesetB = Fileset(name="mergeMergedFilesetB") mergeMergedFilesetA.create() mergeMergedFilesetB.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bogus", owner="Steve", task="Test") mergeWorkflow.create() mergeSubscriptionA = Subscription(fileset=mergeFilesetA, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") mergeSubscriptionB = Subscription(fileset=mergeFilesetB, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") mergeSubscriptionA.create() mergeSubscriptionB.create() inputFileset = Fileset(name="inputFileset") inputFileset.create() inputFileA = File(lfn="inputLFNA") inputFileB = File(lfn="inputLFNB") inputFileA.create() inputFileB.create() procWorkflowA = Workflow(name="procWorkflowA", spec="bunk2", owner="Steve", task="Test") procWorkflowA.create() procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA) procWorkflowB = Workflow(name="procWorkflowB", spec="bunk3", owner="Steve", task="Test2") procWorkflowB.create() procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB) procSubscriptionA = Subscription(fileset=inputFileset, workflow=procWorkflowA, split_algo="EventBased") procSubscriptionA.create() procSubscriptionB = Subscription(fileset=inputFileset, workflow=procWorkflowB, split_algo="EventBased") procSubscriptionB.create() jobGroupA = JobGroup(subscription=procSubscriptionA) jobGroupA.create() jobGroupB = JobGroup(subscription=procSubscriptionB) jobGroupB.create() changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") testJobA = Job() testJobA.addFile(inputFileA) testJobA.create(jobGroupA) testJobA["state"] = "cleanout" testJobA["oldstate"] = "new" testJobA["couch_record"] = "somejive" testJobA["retry_count"] = 0 testJobA["outcome"] = "success" testJobA.save() testJobB = Job() testJobB.addFile(inputFileB) testJobB.create(jobGroupA) testJobB["state"] = "cleanout" testJobB["oldstate"] = "new" testJobB["couch_record"] = "somejive" testJobB["retry_count"] = 0 testJobB["outcome"] = "success" testJobB.save() testJobC = Job() testJobC.addFile(inputFileA) testJobC.create(jobGroupB) testJobC["state"] = "cleanout" testJobC["oldstate"] = "new" testJobC["couch_record"] = "somejive" testJobC["retry_count"] = 0 testJobC["outcome"] = "success" testJobC.save() testJobD = Job() testJobD.addFile(inputFileA) testJobD.create(jobGroupB) testJobD["state"] = "cleanout" testJobD["oldstate"] = "new" testJobD["couch_record"] = "somejive" testJobD["retry_count"] = 0 testJobD["outcome"] = "failure" testJobD.save() testJobE = Job() testJobE.addFile(inputFileB) testJobE.create(jobGroupB) testJobE["state"] = "cleanout" testJobE["oldstate"] = "new" testJobE["couch_record"] = "somejive" testJobE["retry_count"] = 0 testJobE["outcome"] = "success" testJobE.save() testJobF = Job() testJobF.addFile(inputFileB) testJobF.create(jobGroupB) testJobF["state"] = "cleanout" testJobF["oldstate"] = "new" testJobF["couch_record"] = "somejive" testJobF["retry_count"] = 0 testJobF["outcome"] = "failure" testJobF.save() changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD, testJobE, testJobF]) fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileA.addRun(Run(1, *[45])) fileA.create() fileA.addParent(inputFileA["lfn"]) fileB = File(lfn="fileB", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileB.addRun(Run(1, *[45])) fileB.create() fileB.addParent(inputFileB["lfn"]) jobGroupA.output.addFile(fileA) jobGroupA.output.addFile(fileB) jobGroupA.output.commit() mergeFilesetA.addFile(fileA) mergeFilesetA.addFile(fileB) mergeFilesetA.commit() fileC = File(lfn="fileC", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileC.addRun(Run(1, *[45])) fileC.create() fileC.addParent(inputFileA["lfn"]) fileD = File(lfn="fileD", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileD.addRun(Run(1, *[45])) fileD.create() fileD.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileC) jobGroupB.output.addFile(fileD) mergeFilesetB.addFile(fileC) mergeFilesetB.addFile(fileD) mergeFilesetB.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=mergeSubscriptionB) result = jobFactory(min_merge_size=1, max_merge_size=20000, max_merge_events=7169) assert len(result) == 0, \ "Error: No merge jobs should have been created." fileE = File(lfn="fileE", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileE.addRun(Run(1, *[45])) fileE.create() fileE.addParent(inputFileA["lfn"]) fileF = File(lfn="fileF", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileF.addRun(Run(1, *[45])) fileF.create() fileF.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileE) jobGroupB.output.addFile(fileF) mergeFilesetB.addFile(fileE) mergeFilesetB.addFile(fileF) mergeFilesetB.commit() testJobD["outcome"] = "success" testJobD.save() testJobF["outcome"] = "success" testJobF.save() changeStateDAO.execute([testJobD, testJobF]) result = jobFactory(min_merge_size=1, max_merge_size=20000, max_merge_events=7169) assert len(result) == 1, \ "Error: One merge job should have been created: %s" % len(result) return
def testFailJobInput(self): """ _testFailJobInput_ Test the Jobs.FailInput DAO and verify that it doesn't affect other jobs/subscriptions that run over the same files. """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") bogusWorkflow = Workflow(spec="spec1.xml", owner="Steve", name="wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name="TestFileset") bogusFileset = Fileset(name="BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) bogusSubscription = Subscription(fileset=bogusFileset, workflow=bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.completeFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription=testSubscription) bogusJobGroup = JobGroup(subscription=bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name="TestJobA", files=[testFileA, testFileB, testFileC]) testJobB = Job(name="TestJobB", files=[testFileA, testFileB, testFileC]) bogusJob = Job(name="BogusJob", files=[testFileA, testFileB, testFileC]) testJobA.create(group=testJobGroup) testJobB.create(group=testJobGroup) bogusJob.create(group=bogusJobGroup) testJobA.failInputFiles() testJobB.failInputFiles() self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) changeStateAction = self.daoFactory(classname="Jobs.ChangeState") testJobB["state"] = "cleanout" changeStateAction.execute([testJobB]) # Try again testJobA.failInputFiles() # Should now be failed self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) # bogus should be unchanged self.assertEqual(len(bogusSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Acquired")), 3) self.assertEqual(len(bogusSubscription.filesOfStatus("Failed")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Completed")), 0) return
def testGetOutputMapDAO(self): """ _testGetOutputMapDAO_ Verify the proper behavior of the GetOutputMapDAO for a variety of different processing chains. """ recoOutputFileset = Fileset(name="RECO") recoOutputFileset.create() mergedRecoOutputFileset = Fileset(name="MergedRECO") mergedRecoOutputFileset.create() alcaOutputFileset = Fileset(name="ALCA") alcaOutputFileset.create() mergedAlcaOutputFileset = Fileset(name="MergedALCA") mergedAlcaOutputFileset.create() dqmOutputFileset = Fileset(name="DQM") dqmOutputFileset.create() mergedDqmOutputFileset = Fileset(name="MergedDQM") mergedDqmOutputFileset.create() cleanupFileset = Fileset(name="Cleanup") cleanupFileset.create() testWorkflow = Workflow(spec="wf001.xml", owner="Steve", name="TestWF", task="None") testWorkflow.create() testWorkflow.addOutput("output", recoOutputFileset, mergedRecoOutputFileset) testWorkflow.addOutput("ALCARECOStreamCombined", alcaOutputFileset, mergedAlcaOutputFileset) testWorkflow.addOutput("DQM", dqmOutputFileset, mergedDqmOutputFileset) testWorkflow.addOutput("output", cleanupFileset) testWorkflow.addOutput("ALCARECOStreamCombined", cleanupFileset) testWorkflow.addOutput("DQM", cleanupFileset) testRecoMergeWorkflow = Workflow(spec="wf002.xml", owner="Steve", name="TestRecoMergeWF", task="None") testRecoMergeWorkflow.create() testRecoMergeWorkflow.addOutput("anything", mergedRecoOutputFileset, mergedRecoOutputFileset) testRecoProcWorkflow = Workflow(spec="wf004.xml", owner="Steve", name="TestRecoProcWF", task="None") testRecoProcWorkflow.create() testAlcaChildWorkflow = Workflow(spec="wf003.xml", owner="Steve", name="TestAlcaChildWF", task="None") testAlcaChildWorkflow.create() inputFile = File(lfn="/path/to/some/lfn", size=600000, events=60000, locations="cmssrm.fnal.gov") inputFile.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFileset.addFile(inputFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") testMergeRecoSubscription = Subscription(fileset=recoOutputFileset, workflow=testRecoMergeWorkflow, split_algo="WMBSMergeBySize", type="Merge") testProcRecoSubscription = Subscription(fileset=recoOutputFileset, workflow=testRecoProcWorkflow, split_algo="FileBased", type="Processing") testChildAlcaSubscription = Subscription(fileset=alcaOutputFileset, workflow=testAlcaChildWorkflow, split_algo="FileBased", type="Processing") testSubscription.create() testMergeRecoSubscription.create() testProcRecoSubscription.create() testChildAlcaSubscription.create() testSubscription.acquireFiles() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="SplitJobA", files=[inputFile]) testJob.create(group=testJobGroup) testJob["state"] = "complete" testJob.save() outputMapAction = self.daoFactory(classname="Jobs.GetOutputMap") outputMap = outputMapAction.execute(jobID=testJob["id"]) assert len(outputMap.keys()) == 3, \ "Error: Wrong number of outputs for primary workflow." goldenMap = {"output": (recoOutputFileset.id, mergedRecoOutputFileset.id), "ALCARECOStreamCombined": (alcaOutputFileset.id, mergedAlcaOutputFileset.id), "DQM": (dqmOutputFileset.id, mergedDqmOutputFileset.id)} for outputID in outputMap.keys(): for outputFilesets in outputMap[outputID]: if outputFilesets["merged_output_fileset"] is None: self.assertEqual(outputFilesets["output_fileset"], cleanupFileset.id, "Error: Cleanup fileset is wrong.") continue self.assertTrue(outputID in goldenMap.keys(), "Error: Output identifier is missing.") self.assertEqual(outputFilesets["output_fileset"], goldenMap[outputID][0], "Error: Output fileset is wrong.") self.assertEqual(outputFilesets["merged_output_fileset"], goldenMap[outputID][1], "Error: Merged output fileset is wrong.") del goldenMap[outputID] self.assertEqual(len(goldenMap.keys()), 0, "Error: Missing output maps.") return
def testList(self): """ _testList_ Test the functions that list thresholds for creating jobs and submitting jobs. """ myResourceControl = ResourceControl() myResourceControl.insertSite("testSite1", 10, 20, "testSE1", "testCE1", "T1_US_FNAL", "LsfPlugin") myResourceControl.insertSite("testSite2", 20, 40, "testSE2", "testCE2") myResourceControl.insertThreshold("testSite1", "Processing", 20, 10) myResourceControl.insertThreshold("testSite1", "Merge", 200, 100) myResourceControl.insertThreshold("testSite2", "Processing", 50, 25) myResourceControl.insertThreshold("testSite2", "Merge", 135, 65) testWorkflow = Workflow(spec=makeUUID(), owner="Steve", name=makeUUID(), task="Test") testWorkflow.create() testFilesetA = Fileset(name="TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name="TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name="TestFilesetC") testFilesetC.create() testFileA = File(lfn="testFileA", locations=set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset=testFilesetA, workflow=testWorkflow, type="Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{ "site_name": "testSite1", "valid": True }]) testSubscriptionB = Subscription(fileset=testFilesetB, workflow=testWorkflow, type="Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{ "site_name": "testSite1", "valid": False }]) testSubscriptionC = Subscription(fileset=testFilesetC, workflow=testWorkflow, type="Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription=testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription=testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription=testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name="testJobA", files=[testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name="testJobB", files=[testFileA]) testJobB["couch_record"] = makeUUID() testJobB.create(group=testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name="testJobC", files=[testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name="testJobD", files=[testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group=testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name="testJobE", files=[testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group=testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name="testJobF", files=[testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group=testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name="testJobG", files=[testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group=testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name="testJobH", files=[testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group=testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name="testJobI", files=[testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group=testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name="testJobJ", files=[testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group=testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[ testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname="Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite1") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") createThresholds = myResourceControl.listThresholdsForCreate() submitThresholds = myResourceControl.listThresholdsForSubmit() self.assertEqual(len(createThresholds.keys()), 2, "Error: Wrong number of sites in create thresholds") self.assertEqual(createThresholds["testSite1"]["total_slots"], 10, "Error: Wrong number of slots for site 1") self.assertEqual(createThresholds["testSite2"]["total_slots"], 20, "Error: Wrong number of slots for site 2") # We should have two running jobs with locations at site one, # two running jobs without locations at site two, and one running # job without a location at site one and two. self.assertEqual(createThresholds["testSite1"]["pending_jobs"], 4, "Error: Wrong number of pending jobs for site 1") # We should have one running job with a location at site 2 and # another running job without a location. self.assertEqual(createThresholds["testSite2"]["pending_jobs"], 2, "Error: Wrong number of pending jobs for site 2") # We should also have a phedex_name self.assertEqual(createThresholds["testSite1"]["cms_name"], "T1_US_FNAL") self.assertEqual(createThresholds["testSite2"]["cms_name"], None) mergeThreshold1 = None mergeThreshold2 = None procThreshold1 = None procThreshold2 = None self.assertEqual(submitThresholds["testSite1"]['cms_name'], 'T1_US_FNAL') for threshold in submitThresholds["testSite1"]["thresholds"]: if threshold['task_type'] == "Merge": mergeThreshold1 = threshold elif threshold['task_type'] == "Processing": procThreshold1 = threshold self.assertEqual(submitThresholds["testSite2"]['cms_name'], None) for threshold in submitThresholds["testSite2"]["thresholds"]: if threshold['task_type'] == "Merge": mergeThreshold2 = threshold elif threshold['task_type'] == "Processing": procThreshold2 = threshold self.assertEqual( submitThresholds["testSite1"]["total_running_jobs"], 1, "Error: Wrong number of running jobs for submit thresholds.") self.assertEqual( submitThresholds["testSite2"]["total_running_jobs"], 0, "Error: Wrong number of running jobs for submit thresholds.") self.assertEqual( submitThresholds["testSite1"]["total_pending_jobs"], 1, "Error: Wrong number of pending jobs for submit thresholds.") self.assertEqual( submitThresholds["testSite2"]["total_pending_jobs"], 0, "Error: Wrong number of pending jobs for submit thresholds.") self.assertEqual( mergeThreshold1["task_running_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( mergeThreshold1["task_pending_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( procThreshold1["task_running_jobs"], 1, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( procThreshold1["task_pending_jobs"], 1, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( mergeThreshold2["task_running_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( mergeThreshold2["task_pending_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( procThreshold2["task_running_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( procThreshold2["task_pending_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") return
def stuffWMBS(self, injected = True): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute(siteName = "s1", seName = "somese.cern.ch") changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState") self.mergeFileset = Fileset(name = "mergeFileset") self.mergeFileset.create() self.bogusFileset = Fileset(name = "bogusFileset") self.bogusFileset.create() self.mergeMergedFileset = Fileset(name = "mergeMergedFileset") self.mergeMergedFileset.create() self.bogusMergedFileset = Fileset(name = "bogusMergedFileset") self.bogusMergedFileset.create() mergeWorkflow = Workflow(name = "mergeWorkflow", spec = "bunk2", owner = "Steve", task="Test") mergeWorkflow.create() markWorkflow = self.daoFactory(classname = "Workflow.MarkInjectedWorkflows") markWorkflow.execute(names = [mergeWorkflow.name], injected = injected) self.mergeSubscription = Subscription(fileset = self.mergeFileset, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize") self.mergeSubscription.create() self.bogusSubscription = Subscription(fileset = self.bogusFileset, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize") inputFileset = Fileset(name = "inputFileset") inputFileset.create() inputWorkflow = Workflow(name = "inputWorkflow", spec = "input", owner = "Steve", task = "Test") inputWorkflow.create() inputWorkflow.addOutput("output", self.mergeFileset, self.mergeMergedFileset) inputWorkflow.addOutput("output2", self.bogusFileset, self.bogusMergedFileset) bogusInputWorkflow = Workflow(name = "bogusInputWorkflow", spec = "input", owner = "Steve", task = "Test") bogusInputWorkflow.create() inputSubscription = Subscription(fileset = inputFileset, workflow = inputWorkflow) inputSubscription.create() bogusInputSubscription = Subscription(fileset = inputFileset, workflow = bogusInputWorkflow) bogusInputSubscription.create() parentFile1 = File(lfn = "parentFile1") parentFile1.create() parentFile2 = File(lfn = "parentFile2") parentFile2.create() parentFile3 = File(lfn = "parentFile3") parentFile3.create() parentFile4 = File(lfn = "parentFile4") parentFile4.create() self.parentFileSite2 = File(lfn = "parentFileSite2") self.parentFileSite2.create() jobGroup1 = JobGroup(subscription = inputSubscription) jobGroup1.create() jobGroup2 = JobGroup(subscription = inputSubscription) jobGroup2.create() jobGroup3 = JobGroup(subscription = bogusInputSubscription) jobGroup3.create() testJob1 = Job() testJob1.addFile(parentFile1) testJob1.create(jobGroup1) testJob1["state"] = "cleanout" testJob1["oldstate"] = "new" testJob1["couch_record"] = "somejive" testJob1["retry_count"] = 0 testJob1["outcome"] = "success" testJob1.save() changeStateDAO.execute([testJob1]) testJob1A = Job() testJob1A.addFile(parentFile1) testJob1A.create(jobGroup3) testJob1A["state"] = "cleanout" testJob1A["oldstate"] = "new" testJob1A["couch_record"] = "somejive" testJob1A["retry_count"] = 0 testJob1A["outcome"] = "failure" testJob1A.save() changeStateDAO.execute([testJob1A]) testJob2 = Job() testJob2.addFile(parentFile2) testJob2.create(jobGroup1) testJob2["state"] = "cleanout" testJob2["oldstate"] = "new" testJob2["couch_record"] = "somejive" testJob2["retry_count"] = 0 testJob2["outcome"] = "success" testJob2.save() changeStateDAO.execute([testJob2]) testJob3 = Job() testJob3.addFile(parentFile3) testJob3.create(jobGroup2) testJob3["state"] = "cleanout" testJob3["oldstate"] = "new" testJob3["couch_record"] = "somejive" testJob3["retry_count"] = 0 testJob3["outcome"] = "success" testJob3.save() changeStateDAO.execute([testJob3]) testJob4 = Job() testJob4.addFile(parentFile4) testJob4.create(jobGroup2) testJob4["state"] = "cleanout" testJob4["oldstate"] = "new" testJob4["couch_record"] = "somejive" testJob4["retry_count"] = 0 testJob4["outcome"] = "failure" testJob4.save() changeStateDAO.execute([testJob4]) # We'll simulate a failed split by event job that the merger should # ignore. parentFile5 = File(lfn = "parentFile5") parentFile5.create() testJob5 = Job() testJob5.addFile(parentFile5) testJob5.create(jobGroup2) testJob5["state"] = "cleanout" testJob5["oldstate"] = "new" testJob5["couch_record"] = "somejive" testJob5["retry_count"] = 0 testJob5["outcome"] = "success" testJob5.save() changeStateDAO.execute([testJob5]) testJob6 = Job() testJob6.addFile(parentFile5) testJob6.create(jobGroup2) testJob6["state"] = "cleanout" testJob6["oldstate"] = "new" testJob6["couch_record"] = "somejive" testJob6["retry_count"] = 0 testJob6["outcome"] = "failure" testJob6.save() changeStateDAO.execute([testJob6]) testJob7 = Job() testJob7.addFile(self.parentFileSite2) testJob7.create(jobGroup2) testJob7["state"] = "cleanout" testJob7["oldstate"] = "new" testJob7["couch_record"] = "somejive" testJob7["retry_count"] = 0 testJob7["outcome"] = "success" testJob7.save() changeStateDAO.execute([testJob7]) badFile1 = File(lfn = "badFile1", size = 10241024, events = 10241024, first_event = 0, locations = set(["somese.cern.ch"])) badFile1.addRun(Run(1, *[45])) badFile1.create() badFile1.addParent(parentFile5["lfn"]) file1 = File(lfn = "file1", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) file1.addRun(Run(1, *[45])) file1.create() file1.addParent(parentFile1["lfn"]) file2 = File(lfn = "file2", size = 1024, events = 1024, first_event = 1024, locations = set(["somese.cern.ch"])) file2.addRun(Run(1, *[45])) file2.create() file2.addParent(parentFile1["lfn"]) file3 = File(lfn = "file3", size = 1024, events = 1024, first_event = 2048, locations = set(["somese.cern.ch"])) file3.addRun(Run(1, *[45])) file3.create() file3.addParent(parentFile1["lfn"]) file4 = File(lfn = "file4", size = 1024, events = 1024, first_event = 3072, locations = set(["somese.cern.ch"])) file4.addRun(Run(1, *[45])) file4.create() file4.addParent(parentFile1["lfn"]) fileA = File(lfn = "fileA", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileA.addRun(Run(1, *[46])) fileA.create() fileA.addParent(parentFile2["lfn"]) fileB = File(lfn = "fileB", size = 1024, events = 1024, first_event = 1024, locations = set(["somese.cern.ch"])) fileB.addRun(Run(1, *[46])) fileB.create() fileB.addParent(parentFile2["lfn"]) fileC = File(lfn = "fileC", size = 1024, events = 1024, first_event = 2048, locations = set(["somese.cern.ch"])) fileC.addRun(Run(1, *[46])) fileC.create() fileC.addParent(parentFile2["lfn"]) fileI = File(lfn = "fileI", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileI.addRun(Run(2, *[46])) fileI.create() fileI.addParent(parentFile3["lfn"]) fileII = File(lfn = "fileII", size = 1024, events = 1024, first_event = 1024, locations = set(["somese.cern.ch"])) fileII.addRun(Run(2, *[46])) fileII.create() fileII.addParent(parentFile3["lfn"]) fileIII = File(lfn = "fileIII", size = 1024, events = 1024, first_event = 2048, locations = set(["somese.cern.ch"])) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIII.addParent(parentFile3["lfn"]) fileIV = File(lfn = "fileIV", size = 1024, events = 1024, first_event = 3072, locations = set(["somese.cern.ch"])) fileIV.addRun(Run(2, *[46])) fileIV.create() fileIV.addParent(parentFile3["lfn"]) fileX = File(lfn = "badFileA", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileX.addRun(Run(1, *[47])) fileX.create() fileX.addParent(parentFile4["lfn"]) fileY = File(lfn = "badFileB", size = 1024, events = 1024, first_event = 1024, locations = set(["somese.cern.ch"])) fileY.addRun(Run(1, *[47])) fileY.create() fileY.addParent(parentFile4["lfn"]) fileZ = File(lfn = "badFileC", size = 1024, events = 1024, first_event = 2048, locations = set(["somese.cern.ch"])) fileZ.addRun(Run(1, *[47])) fileZ.create() fileZ.addParent(parentFile4["lfn"]) jobGroup1.output.addFile(file1) jobGroup1.output.addFile(file2) jobGroup1.output.addFile(file3) jobGroup1.output.addFile(file4) jobGroup1.output.addFile(fileA) jobGroup1.output.addFile(fileB) jobGroup1.output.addFile(fileC) jobGroup1.output.commit() jobGroup2.output.addFile(fileI) jobGroup2.output.addFile(fileII) jobGroup2.output.addFile(fileIII) jobGroup2.output.addFile(fileIV) jobGroup2.output.addFile(fileX) jobGroup2.output.addFile(fileY) jobGroup2.output.addFile(fileZ) jobGroup2.output.addFile(badFile1) jobGroup2.output.commit() for file in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]: self.mergeFileset.addFile(file) self.bogusFileset.addFile(file) self.mergeFileset.commit() self.bogusFileset.commit() return
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == 'mysql': return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def createTestJobGroup(self, nJobs=10, retry_count=1, workloadPath='test', fwjrPath=None, workloadName=makeUUID(), fileModifier=''): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec=workloadPath, owner="cmsdataops", group="cmsdataops", name=workloadName, task="/TestWorkload/ReReco") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier, size=1024, events=10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('T2_CH_CERN') testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileA.addRun(Run(10, *[12312, 12313])) testFileA.setLocation('T2_CH_CERN') testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileB.addRun(Run(10, *[12314, 12315, 12316])) testFileB.setLocation('T2_CH_CERN') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier) testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier) for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312]) testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316]) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) testJob['fwjr_path'] = fwjrPath os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJob.create(group=testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files=[testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def testParentageByJob(self): """ _testParentageByJob_ Tests the DAO that assigns parentage by Job """ testWorkflow = Workflow(spec='hello', owner="mnorman", name="wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={'cksum': 1}) testFileParentB.addRun(Run(1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testJobA = Job() testJobA.create(group=testJobGroup) testJobA.addFile(testFileParentA) testJobA.addFile(testFileParentB) testJobA.associateFiles() parentAction = self.daofactory(classname="Files.SetParentageByJob") parentAction.execute(binds={ 'jobid': testJobA.exists(), 'child': testFileA['lfn'] }) testFileB = File(id=testFileA["id"]) testFileB.loadData(parentage=1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, True, "ERROR: Unknown parent file") goldenFiles.remove(parentFile) self.assertEqual(len(goldenFiles), 0, "ERROR: Some parents are missing")
class JobTestBase(unittest.TestCase): """ Base class for tests of WMBS job class (Job_t and JobWorkUnit_t) """ def __init__(self, *args, **kwargs): """ Define some class instance variables we'll fill later """ super(JobTestBase, self).__init__(*args, **kwargs) self.testFileA = None self.testFileB = None self.testWorkflow = None self.testJob = None return def setUp(self): """ _setUp_ Setup the database and logging connection. Try to create all of the WMBS tables. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationNew = self.daoFactory(classname="Locations.New") locationNew.execute(siteName="test.site.ch", pnn="T2_CH_CERN") locationNew.execute(siteName="test2.site.ch", pnn="T2_CH_CERN") return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.clearDatabase() @staticmethod def createTestJob(subscriptionType="Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name=makeUUID(), files=[testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup) testJob.associateFiles() return testJob def createSingleJobWorkflow(self): """ Create a workflow with one jobs and two files and store the results in instance variables """ self.testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") self.testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=self.testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() self.testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) self.testFileA.addRun(Run(1, *[45])) self.testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) self.testFileB.addRun(Run(1, *[46])) self.testFileA.create() self.testFileB.create() self.testJob = Job(name="TestJob", files=[self.testFileA, self.testFileB]) self.testJob.create(group=testJobGroup) self.testJob.associateFiles()
def testParallelProcessing(self): """ _testParallelProcessing_ Verify that merging works correctly when multiple processing subscriptions are run over the same input files. The merging algorithm should ignore processing jobs that feed into different merge subscriptions. """ locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute(siteName = "s1", seName = "somese.cern.ch") mergeFilesetA = Fileset(name = "mergeFilesetA") mergeFilesetB = Fileset(name = "mergeFilesetB") mergeFilesetA.create() mergeFilesetB.create() mergeMergedFilesetA = Fileset(name = "mergeMergedFilesetA") mergeMergedFilesetB = Fileset(name = "mergeMergedFilesetB") mergeMergedFilesetA.create() mergeMergedFilesetB.create() mergeWorkflow = Workflow(name = "mergeWorkflow", spec = "bogus", owner = "Steve", task = "Test") mergeWorkflow.create() mergeSubscriptionA = Subscription(fileset = mergeFilesetA, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize") mergeSubscriptionB = Subscription(fileset = mergeFilesetB, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize") mergeSubscriptionA.create() mergeSubscriptionB.create() inputFileset = Fileset(name = "inputFileset") inputFileset.create() inputFileA = File(lfn = "inputLFNA") inputFileB = File(lfn = "inputLFNB") inputFileA.create() inputFileB.create() procWorkflowA = Workflow(name = "procWorkflowA", spec = "bunk2", owner = "Steve", task = "Test") procWorkflowA.create() procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA) procWorkflowB = Workflow(name = "procWorkflowB", spec = "bunk3", owner = "Steve", task = "Test2") procWorkflowB.create() procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB) procSubscriptionA = Subscription(fileset = inputFileset, workflow = procWorkflowA, split_algo = "EventBased") procSubscriptionA.create() procSubscriptionB = Subscription(fileset = inputFileset, workflow = procWorkflowB, split_algo = "EventBased") procSubscriptionB.create() jobGroupA = JobGroup(subscription = procSubscriptionA) jobGroupA.create() jobGroupB = JobGroup(subscription = procSubscriptionB) jobGroupB.create() changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState") testJobA = Job() testJobA.addFile(inputFileA) testJobA.create(jobGroupA) testJobA["state"] = "cleanout" testJobA["oldstate"] = "new" testJobA["couch_record"] = "somejive" testJobA["retry_count"] = 0 testJobA["outcome"] = "success" testJobA.save() testJobB = Job() testJobB.addFile(inputFileB) testJobB.create(jobGroupA) testJobB["state"] = "cleanout" testJobB["oldstate"] = "new" testJobB["couch_record"] = "somejive" testJobB["retry_count"] = 0 testJobB["outcome"] = "success" testJobB.save() testJobC = Job() testJobC.addFile(inputFileA) testJobC.create(jobGroupB) testJobC["state"] = "cleanout" testJobC["oldstate"] = "new" testJobC["couch_record"] = "somejive" testJobC["retry_count"] = 0 testJobC["outcome"] = "success" testJobC.save() testJobD = Job() testJobD.addFile(inputFileA) testJobD.create(jobGroupB) testJobD["state"] = "cleanout" testJobD["oldstate"] = "new" testJobD["couch_record"] = "somejive" testJobD["retry_count"] = 0 testJobD["outcome"] = "failure" testJobD.save() testJobE = Job() testJobE.addFile(inputFileB) testJobE.create(jobGroupB) testJobE["state"] = "cleanout" testJobE["oldstate"] = "new" testJobE["couch_record"] = "somejive" testJobE["retry_count"] = 0 testJobE["outcome"] = "success" testJobE.save() testJobF = Job() testJobF.addFile(inputFileB) testJobF.create(jobGroupB) testJobF["state"] = "cleanout" testJobF["oldstate"] = "new" testJobF["couch_record"] = "somejive" testJobF["retry_count"] = 0 testJobF["outcome"] = "failure" testJobF.save() changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD, testJobE, testJobF]) fileA = File(lfn = "fileA", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileA.addRun(Run(1, *[45])) fileA.create() fileA.addParent(inputFileA["lfn"]) fileB = File(lfn = "fileB", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileB.addRun(Run(1, *[45])) fileB.create() fileB.addParent(inputFileB["lfn"]) jobGroupA.output.addFile(fileA) jobGroupA.output.addFile(fileB) jobGroupA.output.commit() mergeFilesetA.addFile(fileA) mergeFilesetA.addFile(fileB) mergeFilesetA.commit() fileC = File(lfn = "fileC", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileC.addRun(Run(1, *[45])) fileC.create() fileC.addParent(inputFileA["lfn"]) fileD = File(lfn = "fileD", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileD.addRun(Run(1, *[45])) fileD.create() fileD.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileC) jobGroupB.output.addFile(fileD) mergeFilesetB.addFile(fileC) mergeFilesetB.addFile(fileD) mergeFilesetB.commit() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = mergeSubscriptionB) result = jobFactory(min_merge_size = 1, max_merge_size = 20000, max_merge_events = 7169) assert len(result) == 0, \ "Error: No merge jobs should have been created." fileE = File(lfn = "fileE", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileE.addRun(Run(1, *[45])) fileE.create() fileE.addParent(inputFileA["lfn"]) fileF = File(lfn = "fileF", size = 1024, events = 1024, first_event = 0, locations = set(["somese.cern.ch"])) fileF.addRun(Run(1, *[45])) fileF.create() fileF.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileE) jobGroupB.output.addFile(fileF) mergeFilesetB.addFile(fileE) mergeFilesetB.addFile(fileF) mergeFilesetB.commit() testJobD["outcome"] = "success" testJobD.save() testJobF["outcome"] = "success" testJobF.save() changeStateDAO.execute([testJobD, testJobF]) result = jobFactory(min_merge_size = 1, max_merge_size = 20000, max_merge_events = 7169) assert len(result) == 1, \ "Error: One merge job should have been created: %s" % len(result) return
def stuffWMBS(self, injected=True): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN") locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN") changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") self.mergeFileset = Fileset(name="mergeFileset") self.mergeFileset.create() self.bogusFileset = Fileset(name="bogusFileset") self.bogusFileset.create() self.mergeMergedFileset = Fileset(name="mergeMergedFileset") self.mergeMergedFileset.create() self.bogusMergedFileset = Fileset(name="bogusMergedFileset") self.bogusMergedFileset.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2", owner="Steve", task="Test") mergeWorkflow.create() markWorkflow = self.daoFactory(classname="Workflow.MarkInjectedWorkflows") markWorkflow.execute(names=[mergeWorkflow.name], injected=injected) self.mergeSubscription = Subscription(fileset=self.mergeFileset, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") self.mergeSubscription.create() self.bogusSubscription = Subscription(fileset=self.bogusFileset, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") inputFileset = Fileset(name="inputFileset") inputFileset.create() inputWorkflow = Workflow(name="inputWorkflow", spec="input", owner="Steve", task="Test") inputWorkflow.create() inputWorkflow.addOutput("output", self.mergeFileset, self.mergeMergedFileset) inputWorkflow.addOutput("output2", self.bogusFileset, self.bogusMergedFileset) bogusInputWorkflow = Workflow(name="bogusInputWorkflow", spec="input", owner="Steve", task="Test") bogusInputWorkflow.create() inputSubscription = Subscription(fileset=inputFileset, workflow=inputWorkflow) inputSubscription.create() bogusInputSubscription = Subscription(fileset=inputFileset, workflow=bogusInputWorkflow) bogusInputSubscription.create() parentFile1 = File(lfn="parentFile1") parentFile1.create() parentFile2 = File(lfn="parentFile2") parentFile2.create() parentFile3 = File(lfn="parentFile3") parentFile3.create() parentFile4 = File(lfn="parentFile4") parentFile4.create() self.parentFileSite2 = File(lfn="parentFileSite2") self.parentFileSite2.create() jobGroup1 = JobGroup(subscription=inputSubscription) jobGroup1.create() jobGroup2 = JobGroup(subscription=inputSubscription) jobGroup2.create() jobGroup3 = JobGroup(subscription=bogusInputSubscription) jobGroup3.create() testJob1 = Job() testJob1.addFile(parentFile1) testJob1.create(jobGroup1) testJob1["state"] = "cleanout" testJob1["oldstate"] = "new" testJob1["couch_record"] = "somejive" testJob1["retry_count"] = 0 testJob1["outcome"] = "success" testJob1.save() changeStateDAO.execute([testJob1]) testJob1A = Job() testJob1A.addFile(parentFile1) testJob1A.create(jobGroup3) testJob1A["state"] = "cleanout" testJob1A["oldstate"] = "new" testJob1A["couch_record"] = "somejive" testJob1A["retry_count"] = 0 testJob1A["outcome"] = "failure" testJob1A.save() changeStateDAO.execute([testJob1A]) testJob2 = Job() testJob2.addFile(parentFile2) testJob2.create(jobGroup1) testJob2["state"] = "cleanout" testJob2["oldstate"] = "new" testJob2["couch_record"] = "somejive" testJob2["retry_count"] = 0 testJob2["outcome"] = "success" testJob2.save() changeStateDAO.execute([testJob2]) testJob3 = Job() testJob3.addFile(parentFile3) testJob3.create(jobGroup2) testJob3["state"] = "cleanout" testJob3["oldstate"] = "new" testJob3["couch_record"] = "somejive" testJob3["retry_count"] = 0 testJob3["outcome"] = "success" testJob3.save() changeStateDAO.execute([testJob3]) testJob4 = Job() testJob4.addFile(parentFile4) testJob4.create(jobGroup2) testJob4["state"] = "cleanout" testJob4["oldstate"] = "new" testJob4["couch_record"] = "somejive" testJob4["retry_count"] = 0 testJob4["outcome"] = "failure" testJob4.save() changeStateDAO.execute([testJob4]) # We'll simulate a failed split by event job that the merger should # ignore. parentFile5 = File(lfn="parentFile5") parentFile5.create() testJob5 = Job() testJob5.addFile(parentFile5) testJob5.create(jobGroup2) testJob5["state"] = "cleanout" testJob5["oldstate"] = "new" testJob5["couch_record"] = "somejive" testJob5["retry_count"] = 0 testJob5["outcome"] = "success" testJob5.save() changeStateDAO.execute([testJob5]) testJob6 = Job() testJob6.addFile(parentFile5) testJob6.create(jobGroup2) testJob6["state"] = "cleanout" testJob6["oldstate"] = "new" testJob6["couch_record"] = "somejive" testJob6["retry_count"] = 0 testJob6["outcome"] = "failure" testJob6.save() changeStateDAO.execute([testJob6]) testJob7 = Job() testJob7.addFile(self.parentFileSite2) testJob7.create(jobGroup2) testJob7["state"] = "cleanout" testJob7["oldstate"] = "new" testJob7["couch_record"] = "somejive" testJob7["retry_count"] = 0 testJob7["outcome"] = "success" testJob7.save() changeStateDAO.execute([testJob7]) badFile1 = File(lfn="badFile1", size=10241024, events=10241024, first_event=0, locations={"T2_CH_CERN"}) badFile1.addRun(Run(1, *[45])) badFile1.create() badFile1.addParent(parentFile5["lfn"]) file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) file1.addRun(Run(1, *[45])) file1.create() file1.addParent(parentFile1["lfn"]) file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) file2.addRun(Run(1, *[45])) file2.create() file2.addParent(parentFile1["lfn"]) file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) file3.addRun(Run(1, *[45])) file3.create() file3.addParent(parentFile1["lfn"]) file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations={"T2_CH_CERN"}) file4.addRun(Run(1, *[45])) file4.create() file4.addParent(parentFile1["lfn"]) fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileA.addRun(Run(1, *[46])) fileA.create() fileA.addParent(parentFile2["lfn"]) fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileB.addRun(Run(1, *[46])) fileB.create() fileB.addParent(parentFile2["lfn"]) fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileC.addRun(Run(1, *[46])) fileC.create() fileC.addParent(parentFile2["lfn"]) fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileI.addRun(Run(2, *[46])) fileI.create() fileI.addParent(parentFile3["lfn"]) fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileII.addRun(Run(2, *[46])) fileII.create() fileII.addParent(parentFile3["lfn"]) fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIII.addParent(parentFile3["lfn"]) fileIV = File(lfn="fileIV", size=1024, events=1024, first_event=3072, locations={"T2_CH_CERN"}) fileIV.addRun(Run(2, *[46])) fileIV.create() fileIV.addParent(parentFile3["lfn"]) fileX = File(lfn="badFileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileX.addRun(Run(1, *[47])) fileX.create() fileX.addParent(parentFile4["lfn"]) fileY = File(lfn="badFileB", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileY.addRun(Run(1, *[47])) fileY.create() fileY.addParent(parentFile4["lfn"]) fileZ = File(lfn="badFileC", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileZ.addRun(Run(1, *[47])) fileZ.create() fileZ.addParent(parentFile4["lfn"]) jobGroup1.output.addFile(file1) jobGroup1.output.addFile(file2) jobGroup1.output.addFile(file3) jobGroup1.output.addFile(file4) jobGroup1.output.addFile(fileA) jobGroup1.output.addFile(fileB) jobGroup1.output.addFile(fileC) jobGroup1.output.commit() jobGroup2.output.addFile(fileI) jobGroup2.output.addFile(fileII) jobGroup2.output.addFile(fileIII) jobGroup2.output.addFile(fileIV) jobGroup2.output.addFile(fileX) jobGroup2.output.addFile(fileY) jobGroup2.output.addFile(fileZ) jobGroup2.output.addFile(badFile1) jobGroup2.output.commit() for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]: self.mergeFileset.addFile(fileObj) self.bogusFileset.addFile(fileObj) self.mergeFileset.commit() self.bogusFileset.commit() return
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec=makeUUID(), owner="Steve", name=makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJobA = Job(name=makeUUID(), files=[]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroup) testJobA["state"] = "executing" testJobB = Job(name=makeUUID(), files=[]) testJobB["couch_record"] = makeUUID() testJobB.create(group=testJobGroup) testJobB["state"] = "complete" testJobC = Job(name=makeUUID(), files=[]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory( classname="Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def testGetOutputParentLFNs(self): """ _testGetOutputParentLFNs_ Verify that the getOutputDBSParentLFNs() method returns the correct parent LFNs. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, merged=True) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10, merged=True) testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10, merged=False) testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10, merged=False) testFileE = File(lfn="/this/is/a/lfnE", size=1024, events=10, merged=True) testFileF = File(lfn="/this/is/a/lfnF", size=1024, events=10, merged=True) testFileA.create() testFileB.create() testFileC.create() testFileD.create() testFileE.create() testFileF.create() testFileE.addChild(testFileC["lfn"]) testFileF.addChild(testFileD["lfn"]) testJobA = Job(name="TestJob", files=[testFileA, testFileB]) testJobA["couch_record"] = "somecouchrecord" testJobA["location"] = "test.site.ch" testJobA.create(group=testJobGroup) testJobA.associateFiles() testJobB = Job(name="TestJobB", files=[testFileC, testFileD]) testJobB["couch_record"] = "somecouchrecord" testJobB["location"] = "test.site.ch" testJobB.create(group=testJobGroup) testJobB.associateFiles() goldenLFNs = ["/this/is/a/lfnA", "/this/is/a/lfnB"] parentLFNs = testJobA.getOutputDBSParentLFNs() for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown lfn: %s" % parentLFN goldenLFNs.remove(parentLFN) assert len(goldenLFNs) == 0, \ "ERROR: LFNs are missing: %s" % goldenLFNs goldenLFNs = ["/this/is/a/lfnE", "/this/is/a/lfnF"] parentLFNs = testJobB.getOutputDBSParentLFNs() for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown lfn: %s" % parentLFN goldenLFNs.remove(parentLFN) assert len(goldenLFNs) == 0, \ "ERROR: LFNs are missing..." return
def testGetOutputParentLFNs(self): """ _testGetOutputParentLFNs_ Verify that the getOutputDBSParentLFNs() method returns the correct parent LFNs. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, merged=True) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10, merged=True) testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10, merged=False) testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10, merged=False) testFileE = File(lfn="/this/is/a/lfnE", size=1024, events=10, merged=True) testFileF = File(lfn="/this/is/a/lfnF", size=1024, events=10, merged=True) testFileA.create() testFileB.create() testFileC.create() testFileD.create() testFileE.create() testFileF.create() testFileE.addChild(testFileC["lfn"]) testFileF.addChild(testFileD["lfn"]) testJobA = Job(name="TestJob", files=[testFileA, testFileB]) testJobA["couch_record"] = "somecouchrecord" testJobA["location"] = "test.site.ch" testJobA.create(group=testJobGroup) testJobA.associateFiles() testJobB = Job(name="TestJobB", files=[testFileC, testFileD]) testJobB["couch_record"] = "somecouchrecord" testJobB["location"] = "test.site.ch" testJobB.create(group=testJobGroup) testJobB.associateFiles() goldenLFNs = ["/this/is/a/lfnA", "/this/is/a/lfnB"] parentLFNs = testJobA.getOutputDBSParentLFNs() for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown lfn: %s" % parentLFN goldenLFNs.remove(parentLFN) assert not goldenLFNs, "ERROR: LFNs are missing: %s" % goldenLFNs goldenLFNs = ["/this/is/a/lfnE", "/this/is/a/lfnF"] parentLFNs = testJobB.getOutputDBSParentLFNs() for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown lfn: %s" % parentLFN goldenLFNs.remove(parentLFN) assert not goldenLFNs, "ERROR: LFNs are missing..." return
def testCompleteJobInput(self): """ _testCompleteJobInput_ Verify the correct output of the CompleteInput DAO. This should mark the input for a job as complete once all the jobs that run over a particular file have complete successfully. """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") bogusWorkflow = Workflow(spec="spec1.xml", owner="Steve", name="wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name="TestFileset") bogusFileset = Fileset(name="BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) bogusSubscription = Subscription(fileset=bogusFileset, workflow=bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.acquireFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription=testSubscription) bogusJobGroup = JobGroup(subscription=bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name="TestJobA", files=[testFileA]) testJobB = Job(name="TestJobB", files=[testFileA, testFileB]) testJobC = Job(name="TestJobC", files=[testFileC]) bogusJob = Job(name="BogusJob", files=[testFileA, testFileB, testFileC]) testJobA.create(group=testJobGroup) testJobB.create(group=testJobGroup) testJobC.create(group=testJobGroup) bogusJob.create(group=bogusJobGroup) testJobA["outcome"] = "success" testJobB["outcome"] = "failure" testJobC["outcome"] = "success" testJobA.save() testJobB.save() testJobC.save() testJobA.completeInputFiles() compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles testJobB["outcome"] = "success" testJobB.save() testJobB.completeInputFiles(skipFiles=[testFileB["lfn"]]) availFiles = len(testSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(testSubscription.filesOfStatus("Acquired")) assert acqFiles == 1, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 1, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(testSubscription.filesOfStatus("Failed")) assert failFiles == 1, \ "Error: test sub has wrong number of failed files: %s" % failFiles availFiles = len(bogusSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(bogusSubscription.filesOfStatus("Acquired")) assert acqFiles == 3, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(bogusSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(bogusSubscription.filesOfStatus("Failed")) assert failFiles == 0, \ "Error: test sub has wrong number of failed files: %s" % failFiles return
class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl" ], useDefault=False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = MockDBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=threading.currentThread().logger, dbinterface=threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual( list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job=1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule="OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size=1, max_merge_size=2, max_merge_events=3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule="OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBased", files_per_job=50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule="Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job=1, include_parents=True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) return testWorkload def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = fakeSiteDB() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname="Locations.New") self.ses = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName=site, seName=self.siteDB.cmsNametoSE(site)[0]) self.ses.append(self.siteDB.cmsNametoSE(site)[0]) def createWMSpec(self, name='ReRecoWorkload'): factory = ReRecoWorkloadFactory() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") wmspec.setSubscriptionInformation(custodialSites=[], nonCustodialSites=[], autoApproveSites=[], priority="Low", custodialSubType="Move") return wmspec def createMCWMSpec(self, name='MonteCarloWorkload'): wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = MockDBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask=None, parentFlag=False, detail=False): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, topLevelTask.name(), block, mask, cachepath=self.workDir) if block: if parentFlag: block = self.dbs.getFileBlockWithParents(block)[block] else: block = self.dbs.getFileBlock(block)[block] sub, files = wmbs.createSubscriptionAndAddFiles(block=block) if detail: return wmbs, sub, files else: return wmbs def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config=config) # Create nine jobs self.setupForKillTest(baAPI=baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#2" #add run blacklist self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#2" # Run Whitelist self.topLevelTask.setInputRunWhitelist([2]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsOK(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['1'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['1,1'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsKO(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = [ '123454321' ] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = [ '123,123' ] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#1" wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) def testParentage(self): """ 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ block = self.dataset + "#1" wmbs, sub, numFiles = self.createWMBSHelperWithTopTask( self.wmspec, block, parentFlag=False, detail=True) # file creation without parents self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # no parent per child self.assertEqual(len(child["parents"]), 0) wmbs, sub, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=True, detail=True) self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # one parent per child self.assertEqual(len(child["parents"]), 1) def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(self.ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
def testGetOutputMapDAO(self): """ _testGetOutputMapDAO_ Verify the proper behavior of the GetOutputMapDAO for a variety of different processing chains. """ recoOutputFileset = Fileset(name="RECO") recoOutputFileset.create() mergedRecoOutputFileset = Fileset(name="MergedRECO") mergedRecoOutputFileset.create() alcaOutputFileset = Fileset(name="ALCA") alcaOutputFileset.create() mergedAlcaOutputFileset = Fileset(name="MergedALCA") mergedAlcaOutputFileset.create() dqmOutputFileset = Fileset(name="DQM") dqmOutputFileset.create() mergedDqmOutputFileset = Fileset(name="MergedDQM") mergedDqmOutputFileset.create() cleanupFileset = Fileset(name="Cleanup") cleanupFileset.create() testWorkflow = Workflow(spec="wf001.xml", owner="Steve", name="TestWF", task="None") testWorkflow.create() testWorkflow.addOutput("output", recoOutputFileset, mergedRecoOutputFileset) testWorkflow.addOutput("ALCARECOStreamCombined", alcaOutputFileset, mergedAlcaOutputFileset) testWorkflow.addOutput("DQM", dqmOutputFileset, mergedDqmOutputFileset) testWorkflow.addOutput("output", cleanupFileset) testWorkflow.addOutput("ALCARECOStreamCombined", cleanupFileset) testWorkflow.addOutput("DQM", cleanupFileset) testRecoMergeWorkflow = Workflow(spec="wf002.xml", owner="Steve", name="TestRecoMergeWF", task="None") testRecoMergeWorkflow.create() testRecoMergeWorkflow.addOutput("anything", mergedRecoOutputFileset, mergedRecoOutputFileset) testRecoProcWorkflow = Workflow(spec="wf004.xml", owner="Steve", name="TestRecoProcWF", task="None") testRecoProcWorkflow.create() testAlcaChildWorkflow = Workflow(spec="wf003.xml", owner="Steve", name="TestAlcaChildWF", task="None") testAlcaChildWorkflow.create() inputFile = File(lfn="/path/to/some/lfn", size=600000, events=60000, locations="cmssrm.fnal.gov") inputFile.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFileset.addFile(inputFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") testMergeRecoSubscription = Subscription( fileset=recoOutputFileset, workflow=testRecoMergeWorkflow, split_algo="WMBSMergeBySize", type="Merge") testProcRecoSubscription = Subscription(fileset=recoOutputFileset, workflow=testRecoProcWorkflow, split_algo="FileBased", type="Processing") testChildAlcaSubscription = Subscription( fileset=alcaOutputFileset, workflow=testAlcaChildWorkflow, split_algo="FileBased", type="Processing") testSubscription.create() testMergeRecoSubscription.create() testProcRecoSubscription.create() testChildAlcaSubscription.create() testSubscription.acquireFiles() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="SplitJobA", files=[inputFile]) testJob.create(group=testJobGroup) testJob["state"] = "complete" testJob.save() outputMapAction = self.daoFactory(classname="Jobs.GetOutputMap") outputMap = outputMapAction.execute(jobID=testJob["id"]) assert len(outputMap.keys()) == 3, \ "Error: Wrong number of outputs for primary workflow." goldenMap = { "output": (recoOutputFileset.id, mergedRecoOutputFileset.id), "ALCARECOStreamCombined": (alcaOutputFileset.id, mergedAlcaOutputFileset.id), "DQM": (dqmOutputFileset.id, mergedDqmOutputFileset.id) } for outputID in outputMap.keys(): for outputFilesets in outputMap[outputID]: if outputFilesets["merged_output_fileset"] == None: self.assertEqual(outputFilesets["output_fileset"], cleanupFileset.id, "Error: Cleanup fileset is wrong.") continue self.assertTrue(outputID in goldenMap.keys(), "Error: Output identifier is missing.") self.assertEqual(outputFilesets["output_fileset"], goldenMap[outputID][0], "Error: Output fileset is wrong.") self.assertEqual(outputFilesets["merged_output_fileset"], goldenMap[outputID][1], "Error: Merged output fileset is wrong.") del goldenMap[outputID] self.assertEqual(len(goldenMap.keys()), 0, "Error: Missing output maps.") return
def createJobs(self): """ _createJobs_ Create test jobs in WMBS and BossAir """ testWorkflow = Workflow(spec=makeUUID(), owner="tapas", name=makeUUID(), task="Test") testWorkflow.create() testFilesetA = Fileset(name="TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name="TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name="TestFilesetC") testFilesetC.create() testFileA = File(lfn="testFileA", locations=set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset=testFilesetA, workflow=testWorkflow, type="Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{"site_name": "testSite1", "valid": True}]) testSubscriptionB = Subscription(fileset=testFilesetB, workflow=testWorkflow, type="Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{"site_name": "testSite1", "valid": False}]) testSubscriptionC = Subscription(fileset=testFilesetC, workflow=testWorkflow, type="Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription=testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription=testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription=testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name="testJobA", files=[testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name="testJobB", files=[testFileA]) testJobB["couch_record"] = makeUUID() testJobB["cache_dir"] = self.tempDir testJobB.create(group=testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name="testJobC", files=[testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name="testJobD", files=[testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group=testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name="testJobE", files=[testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group=testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name="testJobF", files=[testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group=testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name="testJobG", files=[testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group=testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name="testJobH", files=[testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group=testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name="testJobI", files=[testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group=testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name="testJobJ", files=[testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group=testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname="Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite2") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") return
class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBSBuffer.Database", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault = False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = MockDBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = threading.currentThread().logger, dbinterface = threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def setupForKillTest(self, baAPI = None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daoFactory(classname = "Locations.New") changeStateAction = daoFactory(classname = "Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000) inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations = "goodse.cern.ch") inputFileB = File("lfnB", locations = "goodse.cern.ch") inputFileC = File("lfnC", locations = "goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations = "goodse.cern.ch") unmergedFileB = File("ulfnB", locations = "goodse.cern.ch") unmergedFileC = File("ulfnC", locations = "goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset = inputFileset, workflow = mainProcWorkflow, type = "Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription = self.mainProcSub) procJobGroup.create() self.procJobA = Job(name = "ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name = "ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name = "ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset = unmergedOutputFileset, workflow = mainProcMergeWorkflow, type = "Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription = self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name = "MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name = "MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name = "MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset = unmergedOutputFileset, workflow = mainCleanupWorkflow, type = "Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription = self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name = "CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name = "CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name = "CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = 'Steve' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs = jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations = "goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec = "spec2", owner = "Steve", name = "Bogus", task = "Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset = bogusFileset, workflow = bogusWorkflow, type = "Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual(list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config = config) # Create nine jobs self.setupForKillTest(baAPI = baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job = 1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size = 1, max_merge_size = 2, max_merge_events = 3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBase", files_per_job = 50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule = "Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job = 1, include_parents = True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) return testWorkload def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") testResubmitWMBSHelper = WMBSHelper(testWorkload, "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createSubscription() mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = fakeSiteDB() def createWMSpec(self, name = 'ReRecoWorkload'): wmspec = rerecoWorkload(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") return wmspec def createMCWMSpec(self, name = 'MonteCarloWorkload'): wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents = 10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = MockDBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask = None): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, block, mask, cachepath = self.workDir) if block: block = self.dbs.getFileBlock(block)[block] wmbs.createSubscriptionAndAddFiles(block = block) return wmbs # def testProduction(self): # """Production workflow""" # pass def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#2" #add run blacklist self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#2" # Run Whitelist self.topLevelTask.setInputRunWhitelist([2]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#1" wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testParentage(self): """ TODO: add the parentage test. 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ pass def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname = "Locations.New") ses = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName = site, seName = self.siteDB.cmsNametoSE(site)) ses.append(self.siteDB.cmsNametoSE(site)) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
class ReportIntegrationTest(unittest.TestCase): """ _ReportIntegrationTest_ """ def setUp(self): """ _setUp_ Setup the database and WMBS for the test. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", "WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.dbsfactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = self.daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", pnn = "T1_US_FNAL_Disk") inputFile = File(lfn = "/path/to/some/lfn", size = 10, events = 10, locations = "T1_US_FNAL_Disk") inputFile.create() inputFileset = Fileset(name = "InputFileset") inputFileset.create() inputFileset.addFile(inputFile) inputFileset.commit() unmergedFileset = Fileset(name = "UnmergedFileset") unmergedFileset.create() mergedFileset = Fileset(name = "MergedFileset") mergedFileset.create() procWorkflow = Workflow(spec = "wf001.xml", owner = "Steve", name = "TestWF", task = "/TestWF/None") procWorkflow.create() procWorkflow.addOutput("outputRECORECO", unmergedFileset) mergeWorkflow = Workflow(spec = "wf002.xml", owner = "Steve", name = "MergeWF", task = "/MergeWF/None") mergeWorkflow.create() mergeWorkflow.addOutput("Merged", mergedFileset) insertWorkflow = self.dbsfactory(classname = "InsertWorkflow") insertWorkflow.execute("TestWF", "/TestWF/None", 0, 0, 0, 0) insertWorkflow.execute("MergeWF", "/MergeWF/None", 0, 0, 0, 0) self.procSubscription = Subscription(fileset = inputFileset, workflow = procWorkflow, split_algo = "FileBased", type = "Processing") self.procSubscription.create() self.procSubscription.acquireFiles() self.mergeSubscription = Subscription(fileset = unmergedFileset, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize", type = "Merge") self.mergeSubscription.create() self.procJobGroup = JobGroup(subscription = self.procSubscription) self.procJobGroup.create() self.mergeJobGroup = JobGroup(subscription = self.mergeSubscription) self.mergeJobGroup.create() self.testJob = Job(name = "testJob", files = [inputFile]) self.testJob.create(group = self.procJobGroup) self.testJob["state"] = "complete" myThread = threading.currentThread() self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.stateChangeAction = self.daofactory(classname = "Jobs.ChangeState") self.setFWJRAction = self.daofactory(classname = "Jobs.SetFWJRPath") self.getJobTypeAction = self.daofactory(classname = "Jobs.GetType") locationAction = self.daofactory(classname = "Locations.New") locationAction.execute(siteName = "cmssrm.fnal.gov") self.stateChangeAction.execute(jobs = [self.testJob]) self.tempDir = tempfile.mkdtemp() return def tearDown(self): """ _tearDown_ Clear out the database and the pickled report file. """ self.testInit.clearDatabase() try: os.remove(os.path.join(self.tempDir, "ProcReport.pkl")) os.remove(os.path.join(self.tempDir, "MergeReport.pkl")) except Exception as ex: pass try: os.rmdir(self.tempDir) except Exception as ex: pass return def createConfig(self, workerThreads): """ _createConfig_ Create a config for the JobAccountant with the given number of worker threads. This config needs to include information for connecting to the database as the component will create it's own database connections. These parameters are still pulled from the environment. """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL") config.JobStateMachine.couchDBName = "report_integration_t" config.JobStateMachine.jobSummaryDBName = "report_integration_wmagent_summary_t" config.component_("JobAccountant") config.JobAccountant.pollInterval = 60 config.JobAccountant.workerThreads = workerThreads config.JobAccountant.componentDir = os.getcwd() config.JobAccountant.logLevel = 'SQLDEBUG' config.component_("TaskArchiver") config.TaskArchiver.localWMStatsURL = "%s/%s" % (config.JobStateMachine.couchurl, config.JobStateMachine.jobSummaryDBName) return config def verifyJobSuccess(self, jobID): """ _verifyJobSuccess_ Verify that the metadata for a successful job is correct. This will check the outcome, retry count and state. """ testJob = Job(id = jobID) testJob.load() assert testJob["state"] == "success", \ "Error: test job in wrong state: %s" % testJob["state"] assert testJob["retry_count"] == 0, \ "Error: test job has wrong retry count: %s" % testJob["retry_count"] assert testJob["outcome"] == "success", \ "Error: test job has wrong outcome: %s" % testJob["outcome"] return def verifyFileMetaData(self, jobID, fwkJobReportFiles): """ _verifyFileMetaData_ Verify that all the files that were output by a job made it into WMBS correctly. Compare the contents of WMBS to the files in the frameworks job report. Note that fwkJobReportFiles is a list of DataStructs File objects. """ testJob = Job(id = jobID) testJob.loadData() inputLFNs = [] for inputFile in testJob["input_files"]: inputLFNs.append(inputFile["lfn"]) for fwkJobReportFile in fwkJobReportFiles: outputFile = File(lfn = fwkJobReportFile["lfn"]) outputFile.loadData(parentage = 1) assert outputFile["events"] == int(fwkJobReportFile["events"]), \ "Error: Output file has wrong events: %s, %s" % \ (outputFile["events"], fwkJobReportFile["events"]) assert outputFile["size"] == int(fwkJobReportFile["size"]), \ "Error: Output file has wrong size: %s, %s" % \ (outputFile["size"], fwkJobReportFile["size"]) for ckType in fwkJobReportFile["checksums"]: assert ckType in outputFile["checksums"], \ "Error: Output file is missing checksums: %s" % ckType assert outputFile["checksums"][ckType] == fwkJobReportFile["checksums"][ckType], \ "Error: Checksums don't match." assert len(fwkJobReportFile["checksums"]) == \ len(outputFile["checksums"]), \ "Error: Wrong number of checksums." jobType = self.getJobTypeAction.execute(jobID = jobID) if jobType == "Merge": assert str(outputFile["merged"]) == "True", \ "Error: Merge jobs should output merged files." else: assert outputFile["merged"] == fwkJobReportFile["merged"], \ "Error: Output file merged output is wrong: %s, %s" % \ (outputFile["merged"], fwkJobReportFile["merged"]) assert len(outputFile["locations"]) == 1, \ "Error: outputfile should have one location: %s" % outputFile["locations"] assert list(outputFile["locations"])[0] == list(fwkJobReportFile["locations"])[0], \ "Error: wrong location for file." assert len(outputFile["parents"]) == len(inputLFNs), \ "Error: Output file has wrong number of parents." for outputParent in outputFile["parents"]: assert outputParent["lfn"] in inputLFNs, \ "Error: Unknown parent file: %s" % outputParent["lfn"] fwjrRuns = {} for run in fwkJobReportFile["runs"]: fwjrRuns[run.run] = run.lumis for run in outputFile["runs"]: assert run.run in fwjrRuns, \ "Error: Extra run in output: %s" % run.run for lumi in run: assert lumi in fwjrRuns[run.run], \ "Error: Extra lumi: %s" % lumi fwjrRuns[run.run].remove(lumi) if len(fwjrRuns[run.run]) == 0: del fwjrRuns[run.run] assert len(fwjrRuns) == 0, \ "Error: Missing runs, lumis: %s" % fwjrRuns testJobGroup = JobGroup(id = testJob["jobgroup"]) testJobGroup.loadData() jobGroupFileset = testJobGroup.output jobGroupFileset.loadData() assert outputFile["id"] in jobGroupFileset.getFiles(type = "id"), \ "Error: output file not in jobgroup fileset." if testJob["mask"]["FirstEvent"] == None: assert outputFile["first_event"] == 0, \ "Error: first event not set correctly: 0, %s" % \ outputFile["first_event"] else: assert testJob["mask"]["FirstEvent"] == outputFile["first_event"], \ "Error: last event not set correctly: %s, %s" % \ (testJob["mask"]["FirstEvent"], outputFile["first_event"]) return def testReportHandling(self): """ _testReportHandling_ Verify that we're able to parse a CMSSW report, convert it to a Report() style report, pickle it and then have the accountant process it. """ self.procPath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") myReport = Report("cmsRun1") myReport.parse(self.procPath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("cmsRun1"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fwjrPath = os.path.join(self.tempDir, "ProcReport.pkl") cmsRunStep = myReport.retrieveStep("cmsRun1") cmsRunStep.status = 0 myReport.setTaskName('/TestWF/None') myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testJob["id"], fwjrPath = fwjrPath) pFile = DBSBufferFile(lfn = "/path/to/some/lfn", size = 600000, events = 60000) pFile.setAlgorithm(appName = "cmsRun", appVer = "UNKNOWN", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") pFile.setDatasetPath("/bogus/dataset/path") #pFile.addRun(Run(1, *[45])) pFile.create() config = self.createConfig(workerThreads = 1) accountant = JobAccountantPoller(config) accountant.setup() accountant.algorithm() self.verifyJobSuccess(self.testJob["id"]) self.verifyFileMetaData(self.testJob["id"], myReport.getAllFilesFromStep("cmsRun1")) inputFile = File(lfn = "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root") inputFile.load() self.testMergeJob = Job(name = "testMergeJob", files = [inputFile]) self.testMergeJob.create(group = self.mergeJobGroup) self.testMergeJob["state"] = "complete" self.stateChangeAction.execute(jobs = [self.testMergeJob]) self.mergePath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWMergeReport.xml") myReport = Report("mergeReco") myReport.parse(self.mergePath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("mergeReco"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fileRef.dataset = {"applicationName": "cmsRun", "applicationVersion": "CMSSW_3_4_2_patch1", "primaryDataset": "MinimumBias", "processedDataset": "Rereco-v1", "dataTier": "RECO"} fwjrPath = os.path.join(self.tempDir, "MergeReport.pkl") myReport.setTaskName('/MergeWF/None') cmsRunStep = myReport.retrieveStep("mergeReco") cmsRunStep.status = 0 myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testMergeJob["id"], fwjrPath = fwjrPath) accountant.algorithm() self.verifyJobSuccess(self.testMergeJob["id"]) self.verifyFileMetaData(self.testMergeJob["id"], myReport.getAllFilesFromStep("mergeReco")) return