def testA_StraightThrough(self): """ _StraightThrough_ Just run everything straight through without any variations """ # Do pre-submit job check nRunning = getCondorRunningJobs() self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) myThread = threading.currentThread() workload = self.createTestWorkload() config = self.getConfig() name = 'WMAgent_Test1' site = self.sites[0] nSubs = 5 nFiles = 10 workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') # Create a collection of files self.createFileCollection(name = name, nSubs = nSubs, nFiles = nFiles, workflowURL = workloadPath, site = site) ############################################################ # Test the JobCreator config.Agent.componentName = 'JobCreator' testJobCreator = JobCreatorPoller(config = config) testJobCreator.algorithm() time.sleep(5) # Did all jobs get created? getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), nSubs*nFiles) # Count database objects result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall() self.assertEqual(len(result), nSubs * nFiles) # Find the test directory testDirectory = os.path.join(self.testDir, 'TestWorkload', 'ReReco') self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory)) self.assertTrue(len(os.listdir(testDirectory)) <= 20) groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0') # First job should be in here self.assertTrue('job_1' in os.listdir(groupDirectory)) jobFile = os.path.join(groupDirectory, 'job_1', 'job.pkl') self.assertTrue(os.path.isfile(jobFile)) f = open(jobFile, 'r') job = cPickle.load(f) f.close() self.assertEqual(job['workflow'], name) self.assertEqual(len(job['input_files']), 1) self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2') ############################################################### # Now test the JobSubmitter config.Agent.componentName = 'JobSubmitter' testJobSubmitter = JobSubmitterPoller(config = config) testJobSubmitter.algorithm() # Check that jobs are in the right state result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) # Check assigned locations getLocationAction = self.daoFactory(classname = "Jobs.GetLocation") for id in result: loc = getLocationAction.execute(jobid = id) self.assertEqual(loc, [[site]]) # Check to make sure we have running jobs nRunning = getCondorRunningJobs() self.assertEqual(nRunning, nFiles * nSubs) ################################################################# # Now the JobTracker config.Agent.componentName = 'JobTracker' testJobTracker = JobTrackerPoller(config = config) testJobTracker.setup() testJobTracker.algorithm() # Running the algo without removing the jobs should do nothing result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) condorRM() time.sleep(1) # All jobs gone? nRunning = getCondorRunningJobs() self.assertEqual(nRunning, 0) testJobTracker.algorithm() time.sleep(5) # Running the algo without removing the jobs should do nothing result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) ################################################################# # Now the JobAccountant # First you need to load all jobs self.getFWJRAction = self.daoFactory(classname = "Jobs.GetFWJRByState") completeJobs = self.getFWJRAction.execute(state = "complete") # Create reports for all jobs self.createReports(jobs = completeJobs, retryCount = 0) config.Agent.componentName = 'JobAccountant' testJobAccountant = JobAccountantPoller(config = config) testJobAccountant.setup() # It should do something with the jobs testJobAccountant.algorithm() # All the jobs should be done now result = getJobsAction.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Success', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) ####################################################################### # Now the JobArchiver config.Agent.componentName = 'JobArchiver' testJobArchiver = JobArchiverPoller(config = config) testJobArchiver.algorithm() # All the jobs should be cleaned up result = getJobsAction.execute(state = 'Success', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) logDir = os.path.join(self.testDir, 'logs') for job in completeJobs: self.assertFalse(os.path.exists(job['fwjr_path'])) jobFolder = 'JobCluster_%i' \ % (int(job['id']/config.JobArchiver.numberOfJobsToCluster)) jobPath = os.path.join(logDir, jobFolder, 'Job_%i.tar' %(job['id'])) self.assertTrue(os.path.isfile(jobPath)) self.assertTrue(os.path.getsize(jobPath) > 0) ########################################################################### # Now the TaskAchiver config.Agent.componentName = 'TaskArchiver' testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing") self.assertEqual(len(result), 0) for jdict in completeJobs: job = Job(id = jdict['id']) self.assertFalse(job.exists()) if os.path.isdir('testDir'): shutil.rmtree('testDir') shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir')) return
def testD_PrototypeChain(self): """ _PrototypeChain_ Prototype the BossAir workflow """ myThread = threading.currentThread() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) config = self.getConfig() config.BossAir.pluginName = 'CondorPlugin' baAPI = BossAirAPI(config = config) workload = self.createTestWorkload() workloadName = "basicWorkload" changeState = ChangeState(config) nSubs = 5 nJobs = 10 cacheDir = os.path.join(self.testDir, 'CacheDir') jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.T2_US_UCSD') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config = config) jobTracker = JobTrackerPoller(config = config) statusPoller = StatusPoller(config = config) jobSubmitter.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) newJobs = baAPI._loadByStatus(status = 'New') self.assertEqual(len(newJobs), nSubs * nJobs) # Check WMBS getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) statusPoller.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) newJobs = baAPI._loadByStatus(status = 'New') self.assertEqual(len(newJobs), 0) newJobs = baAPI._loadByStatus(status = 'Idle') self.assertEqual(len(newJobs), nSubs * nJobs) # Tracker should do nothing jobTracker.algorithm() result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) # Wait for jobs to timeout due to short Pending wait period time.sleep(12) statusPoller.algorithm() newJobs = baAPI._loadByStatus(status = 'Idle') self.assertEqual(len(newJobs), 0) newJobs = baAPI._loadByStatus(status = 'Timeout', complete = '0') self.assertEqual(len(newJobs), nSubs * nJobs) # Jobs should be gone nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) # Check if they're complete completeJobs = baAPI.getComplete() self.assertEqual(len(completeJobs), nSubs * nJobs) # Because they timed out, they all should have failed jobTracker.algorithm() result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'JobFailed', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) return
def testD_PrototypeChain(self): """ _PrototypeChain_ Prototype the BossAir workflow """ dummymyThread = threading.currentThread() nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) config = self.getConfig() config.BossAir.pluginName = 'SimpleCondorPlugin' baAPI = BossAirAPI(config=config, insertStates=True) workload = self.createTestWorkload() workloadName = "basicWorkload" changeState = ChangeState(config) nSubs = 5 nJobs = 10 dummycacheDir = os.path.join(self.testDir, 'CacheDir') jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=os.path.join( self.testDir, 'workloadTest', workloadName), site='se.T2_US_UCSD') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config=config) jobTracker = JobTrackerPoller(config=config) statusPoller = StatusPoller(config=config) jobSubmitter.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), nSubs * nJobs) # Check WMBS getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) statusPoller.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), 0) newJobs = baAPI._loadByStatus(status='Idle') self.assertEqual(len(newJobs), nSubs * nJobs) # Tracker should do nothing jobTracker.algorithm() result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) # Wait for jobs to timeout due to short Pending wait period time.sleep(12) statusPoller.algorithm() newJobs = baAPI._loadByStatus(status='Idle') self.assertEqual(len(newJobs), 0) newJobs = baAPI._loadByStatus(status='Timeout', complete='0') self.assertEqual(len(newJobs), nSubs * nJobs) # Jobs should be gone nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) # Check if they're complete completeJobs = baAPI.getComplete() self.assertEqual(len(completeJobs), nSubs * nJobs) # Because they timed out, they all should have failed jobTracker.algorithm() result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state='JobFailed', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) return
def testA_CondorTest(self): """ _CondorTest_ Because I don't want this test to be submitter dependent: Create a dummy condor job. Submit a dummy condor job. Track it. Kill it. Exit """ myThread = threading.currentThread() # This has to be run with an empty queue nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) nJobs = 10 jobCE = 'cmsosgce.fnal.gov/jobmanager-condor' # Create directories cacheDir = os.path.join(self.testDir, 'CacheDir') submitDir = os.path.join(self.testDir, 'SubmitDir') if not os.path.isdir(cacheDir): os.makedirs(cacheDir) if not os.path.isdir(submitDir): os.makedirs(submitDir) # Get config config = self.getConfig() # Get jobGroup testJobGroup = self.createTestJobs(nJobs = nJobs, cacheDir = cacheDir) # Propogate jobs changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nJobs) jobTracker = JobTrackerPoller(config) jobTracker.setup() # First iteration # There are no jobs in the tracker, # The tracker should register the jobs as missing # This should tell it that they've finished # So the tracker should send them onwards jobTracker.algorithm() result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nJobs) result = self.getJobs.execute(state = 'complete', jobType = "Processing") self.assertEqual(len(result), 0) # Second iteration # Reset the jobs # This time submit them to the queue # The jobs should remain in holding changer.propagate(testJobGroup.jobs, 'executing', 'created') result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nJobs) # Create a submit script createSubmitScript(submitDir) jobPackage = os.path.join(self.testDir, 'JobPackage.pkl') f = open(jobPackage, 'w') f.write(' ') f.close() sandbox = os.path.join(self.testDir, 'sandbox.box') f = open(sandbox, 'w') f.write(' ') f.close() for job in testJobGroup.jobs: job['plugin'] = 'CondorPlugin' job['userdn'] = 'jchurchill' job['custom'] = {'location': 'malpaquet'} job['cache_dir'] = self.testDir job['sandbox'] = sandbox job['packageDir'] = self.testDir info = {} info['packageDir'] = self.testDir info['index'] = 0 info['sandbox'] = sandbox jobTracker.bossAir.submit(jobs = testJobGroup.jobs, info = info) time.sleep(1) # All jobs should be running nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) # Run the algorithm. After this # all jobs should still be running jobTracker.algorithm() # Are jobs in the right state? result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nJobs) result = self.getJobs.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), 0) # Are jobs still in the condor_q nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) # Then we're done jobTracker.bossAir.kill(jobs = testJobGroup.jobs) # No jobs should be left nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) jobTracker.algorithm() # Are jobs in the right state? result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 0) result = self.getJobs.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), nJobs) # This is optional if you want to look at what # files were actually created during running #if os.path.isdir('testDir'): # shutil.rmtree('testDir') #shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir')) return
def testA_CondorTest(self): """ _CondorTest_ Because I don't want this test to be submitter dependent: Create a dummy condor job. Submit a dummy condor job. Track it. Kill it. Exit """ # This has to be run with an empty queue nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) nJobs = 10 jobCE = 'cmsosgce.fnal.gov/jobmanager-condor' # Create directories cacheDir = os.path.join(self.testDir, 'CacheDir') submitDir = os.path.join(self.testDir, 'SubmitDir') if not os.path.isdir(cacheDir): os.makedirs(cacheDir) if not os.path.isdir(submitDir): os.makedirs(submitDir) # Get config config = self.getConfig() # Get jobGroup testJobGroup = self.createTestJobs(nJobs=nJobs, cacheDir=cacheDir) # Propogate jobs changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nJobs) jobTracker = JobTrackerPoller(config) jobTracker.setup() # First iteration # There are no jobs in the tracker, # The tracker should register the jobs as missing # This should tell it that they've finished # So the tracker should send them onwards jobTracker.algorithm() result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nJobs) result = self.getJobs.execute(state='complete', jobType="Processing") self.assertEqual(len(result), 0) # Second iteration # Reset the jobs # This time submit them to the queue # The jobs should remain in holding changer.propagate(testJobGroup.jobs, 'executing', 'created') result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nJobs) # Create a submit script createSubmitScript(submitDir) jobPackage = os.path.join(self.testDir, 'JobPackage.pkl') f = open(jobPackage, 'w') f.write(' ') f.close() sandbox = os.path.join(self.testDir, 'sandbox.box') f = open(sandbox, 'w') f.write(' ') f.close() for job in testJobGroup.jobs: job['plugin'] = 'CondorPlugin' job['userdn'] = 'jchurchill' job['custom'] = {'location': 'malpaquet'} job['cache_dir'] = self.testDir job['sandbox'] = sandbox job['packageDir'] = self.testDir info = {} info['packageDir'] = self.testDir info['index'] = 0 info['sandbox'] = sandbox jobTracker.bossAir.submit(jobs=testJobGroup.jobs, info=info) time.sleep(1) # All jobs should be running nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) # Run the algorithm. After this # all jobs should still be running jobTracker.algorithm() # Are jobs in the right state? result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nJobs) result = self.getJobs.execute(state='Complete', jobType="Processing") self.assertEqual(len(result), 0) # Are jobs still in the condor_q nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) # Then we're done jobTracker.bossAir.kill(jobs=testJobGroup.jobs) # No jobs should be left nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) jobTracker.algorithm() # Are jobs in the right state? result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), 0) result = self.getJobs.execute(state='Complete', jobType="Processing") self.assertEqual(len(result), nJobs) # This is optional if you want to look at what # files were actually created during running # if os.path.isdir('testDir'): # shutil.rmtree('testDir') # shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir')) return