def testJobArchiverPollerAlertsSending_cleanJobCache(self): """ Cause exception (alert-worthy situation) in the cleanJobCache method. """ myThread = threading.currentThread() config = self.getConfig() handler, self.alertsReceiver = \ utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr) testJobArchiver = JobArchiverPoller(config = config) # invoke the problem and thus Alert message job = dict(cache_dir = None) testJobArchiver.cleanJobCache(job) # wait for the generated alert to arrive while len(handler.queue) == 0: time.sleep(0.3) print "%s waiting for alert to arrive ..." % inspect.stack()[0][3] self.alertsReceiver.shutdown() self.alertsReceiver = None # now check if the alert was properly sent self.assertEqual(len(handler.queue), 1) alert = handler.queue[0] self.assertEqual(alert["Source"], testJobArchiver.__class__.__name__)
def testJobArchiverPollerAlertsSending_cleanJobCache(self): """ Cause exception (alert-worthy situation) in the cleanJobCache method. """ myThread = threading.currentThread() config = self.getConfig() handler, self.alertsReceiver = \ utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr) testJobArchiver = JobArchiverPoller(config=config) # invoke the problem and thus Alert message job = dict(cache_dir=None) testJobArchiver.cleanJobCache(job) # wait for the generated alert to arrive while len(handler.queue) == 0: time.sleep(0.3) print "%s waiting for alert to arrive ..." % inspect.stack()[0][3] self.alertsReceiver.shutdown() self.alertsReceiver = None # now check if the alert was properly sent self.assertEqual(len(handler.queue), 1) alert = handler.queue[0] self.assertEqual(alert["Source"], testJobArchiver.__class__.__name__)
def cleanJobs(jobList, config): """ Clean out all directories that were created by the job using the jobArchiver. As an input, the list should be a list of job objects, for which We need the names and IDs. """ currentTime = time.time() jobArchiver = JobArchiverPoller(config) jobArchiver.cleanWorkArea(jobList) #Once you've nuked the job directories, nuke the whole workflow for workflowDir in os.listdir(config.JobCreator.jobCacheDir): for tDir in os.listdir( os.path.join(config.JobCreator.jobCacheDir, workflowDir)): taskDir = os.path.join(config.JobCreator.jobCacheDir, workflowDir, tDir) for jColl in os.listdir(taskDir): if jColl.find('JobCollection') > -1: try: shutil.rmtree(os.path.join(taskDir, jColl)) except ex: msg = 'Error attepting to delete jobCollection: %s' % ( ex) raise Exception(msg) if os.listdir(taskDir) == []: #If the taskDir is empty, remove it shutil.rmtree(taskDir) if os.listdir(os.path.join(config.JobCreator.jobCacheDir, workflowDir)) == []: #If the workflowDir is empty, remove it shutil.rmtree( os.path.join(config.JobCreator.jobCacheDir, workflowDir)) return jobList
def cleanJobs(jobList, config): """ Clean out all directories that were created by the job using the jobArchiver. As an input, the list should be a list of job objects, for which We need the names and IDs. """ currentTime = time.time() jobArchiver = JobArchiverPoller(config) jobArchiver.cleanWorkArea(jobList) #Once you've nuked the job directories, nuke the whole workflow for workflowDir in os.listdir(config.JobCreator.jobCacheDir): for tDir in os.listdir(os.path.join(config.JobCreator.jobCacheDir, workflowDir)): taskDir = os.path.join(config.JobCreator.jobCacheDir, workflowDir, tDir) for jColl in os.listdir(taskDir): if jColl.find('JobCollection') > -1: try: shutil.rmtree(os.path.join(taskDir, jColl)) except ex: msg = 'Error attepting to delete jobCollection: %s' %(ex) raise Exception(msg) if os.listdir(taskDir) == []: #If the taskDir is empty, remove it shutil.rmtree(taskDir) if os.listdir(os.path.join(config.JobCreator.jobCacheDir, workflowDir)) == []: #If the workflowDir is empty, remove it shutil.rmtree(os.path.join(config.JobCreator.jobCacheDir, workflowDir)) return jobList
def preInitialization(self): """ Handles the setup of the worker thread. """ logging.info("JobArchiver.preInitialization") # Add event loop to worker manager myThread = threading.currentThread() pollInterval = self.config.JobArchiver.pollInterval logging.info("Setting poll interval to %s seconds" % pollInterval) myThread.workerThreadManager.addWorker(JobArchiverPoller(self.config), pollInterval) return
def testB_SpeedTest(self): """ _SpeedTest_ Tests the components, as in sees if they load. Otherwise does nothing. """ return myThread = threading.currentThread() config = self.getConfig() self.nJobs = 2000 testJobGroup = self.createTestJobGroup() changer = ChangeState(config) cacheDir = os.path.join(self.testDir, 'test') for job in testJobGroup.jobs: job["outcome"] = "success" job.save() path = os.path.join(cacheDir, job['name']) os.makedirs(path) f = open('%s/%s.out' %(path, job['name']),'w') f.write(job['name']) f.close() job.setCache(path) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') testJobArchiver = JobArchiverPoller(config = config) cProfile.runctx("testJobArchiver.algorithm()", globals(), locals(), filename = "testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats(.2) return
def testA_StraightThrough(self): """ _StraightThrough_ Just run everything straight through without any variations """ # Do pre-submit job check nRunning = getCondorRunningJobs() self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) myThread = threading.currentThread() workload = self.createTestWorkload() config = self.getConfig() name = 'WMAgent_Test1' site = self.sites[0] nSubs = 5 nFiles = 10 workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') # Create a collection of files self.createFileCollection(name = name, nSubs = nSubs, nFiles = nFiles, workflowURL = workloadPath, site = site) ############################################################ # Test the JobCreator config.Agent.componentName = 'JobCreator' testJobCreator = JobCreatorPoller(config = config) testJobCreator.algorithm() time.sleep(5) # Did all jobs get created? getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), nSubs*nFiles) # Count database objects result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall() self.assertEqual(len(result), nSubs * nFiles) # Find the test directory testDirectory = os.path.join(self.testDir, 'TestWorkload', 'ReReco') self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory)) self.assertTrue(len(os.listdir(testDirectory)) <= 20) groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0') # First job should be in here self.assertTrue('job_1' in os.listdir(groupDirectory)) jobFile = os.path.join(groupDirectory, 'job_1', 'job.pkl') self.assertTrue(os.path.isfile(jobFile)) f = open(jobFile, 'r') job = cPickle.load(f) f.close() self.assertEqual(job['workflow'], name) self.assertEqual(len(job['input_files']), 1) self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2') ############################################################### # Now test the JobSubmitter config.Agent.componentName = 'JobSubmitter' testJobSubmitter = JobSubmitterPoller(config = config) testJobSubmitter.algorithm() # Check that jobs are in the right state result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) # Check assigned locations getLocationAction = self.daoFactory(classname = "Jobs.GetLocation") for id in result: loc = getLocationAction.execute(jobid = id) self.assertEqual(loc, [[site]]) # Check to make sure we have running jobs nRunning = getCondorRunningJobs() self.assertEqual(nRunning, nFiles * nSubs) ################################################################# # Now the JobTracker config.Agent.componentName = 'JobTracker' testJobTracker = JobTrackerPoller(config = config) testJobTracker.setup() testJobTracker.algorithm() # Running the algo without removing the jobs should do nothing result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) condorRM() time.sleep(1) # All jobs gone? nRunning = getCondorRunningJobs() self.assertEqual(nRunning, 0) testJobTracker.algorithm() time.sleep(5) # Running the algo without removing the jobs should do nothing result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) ################################################################# # Now the JobAccountant # First you need to load all jobs self.getFWJRAction = self.daoFactory(classname = "Jobs.GetFWJRByState") completeJobs = self.getFWJRAction.execute(state = "complete") # Create reports for all jobs self.createReports(jobs = completeJobs, retryCount = 0) config.Agent.componentName = 'JobAccountant' testJobAccountant = JobAccountantPoller(config = config) testJobAccountant.setup() # It should do something with the jobs testJobAccountant.algorithm() # All the jobs should be done now result = getJobsAction.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Success', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) ####################################################################### # Now the JobArchiver config.Agent.componentName = 'JobArchiver' testJobArchiver = JobArchiverPoller(config = config) testJobArchiver.algorithm() # All the jobs should be cleaned up result = getJobsAction.execute(state = 'Success', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) logDir = os.path.join(self.testDir, 'logs') for job in completeJobs: self.assertFalse(os.path.exists(job['fwjr_path'])) jobFolder = 'JobCluster_%i' \ % (int(job['id']/config.JobArchiver.numberOfJobsToCluster)) jobPath = os.path.join(logDir, jobFolder, 'Job_%i.tar' %(job['id'])) self.assertTrue(os.path.isfile(jobPath)) self.assertTrue(os.path.getsize(jobPath) > 0) ########################################################################### # Now the TaskAchiver config.Agent.componentName = 'TaskArchiver' testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing") self.assertEqual(len(result), 0) for jdict in completeJobs: job = Job(id = jdict['id']) self.assertFalse(job.exists()) if os.path.isdir('testDir'): shutil.rmtree('testDir') shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir')) return
def testBasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() testJobGroup = self.createTestJobGroup() changer = ChangeState(config) cacheDir = os.path.join(self.testDir, 'test') if not os.path.isdir(cacheDir): os.mkdir(cacheDir) # if os.path.isdir(config.JobArchiver.logDir): # shutil.rmtree(config.JobArchiver.logDir) for job in testJobGroup.jobs: myThread.transaction.begin() job["outcome"] = "success" job.save() myThread.transaction.commit() path = os.path.join(cacheDir, job['name']) os.makedirs(path) f = open('%s/%s.out' % (path, job['name']), 'w') f.write(job['name']) f.close() job.setCache(path) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') testJobArchiver = JobArchiverPoller(config=config) testJobArchiver.algorithm() result = myThread.dbi.processData( "SELECT wmbs_job_state.name FROM wmbs_job_state INNER JOIN wmbs_job ON wmbs_job.state = wmbs_job_state.id")[ 0].fetchall() for val in result: self.assertEqual(val.values(), ['cleanout']) dirList = os.listdir(cacheDir) for job in testJobGroup.jobs: self.assertEqual(job["name"] in dirList, False) logPath = os.path.join(config.JobArchiver.componentDir, 'logDir', 'w', 'wf001', 'JobCluster_0') logList = os.listdir(logPath) for job in testJobGroup.jobs: self.assertEqual('Job_%i.tar.bz2' % (job['id']) in logList, True, 'Could not find transferred tarball for job %i' % (job['id'])) pipe = Popen(['tar', '-jxvf', os.path.join(logPath, 'Job_%i.tar.bz2' % (job['id']))], stdout=PIPE, stderr=PIPE, shell=False) pipe.wait() # filename = '%s/%s/%s.out' %(cacheDir[1:], job['name'], job['name']) filename = 'Job_%i/%s.out' % (job['id'], job['name']) self.assertEqual(os.path.isfile(filename), True, 'Could not find file %s' % (filename)) f = open(filename, 'r') fileContents = f.readlines() f.close() self.assertEqual(fileContents[0].find(job['name']) > -1, True) shutil.rmtree('Job_%i' % (job['id'])) if os.path.isfile('Job_%i.tar.bz2' % (job['id'])): os.remove('Job_%i.tar.bz2' % (job['id'])) return
def testA_BasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() testJobGroup = self.createTestJobGroup() changer = ChangeState(config) cacheDir = os.path.join(self.testDir, 'test') if not os.path.isdir(cacheDir): os.mkdir(cacheDir) #if os.path.isdir(config.JobArchiver.logDir): # shutil.rmtree(config.JobArchiver.logDir) for job in testJobGroup.jobs: myThread.transaction.begin() job["outcome"] = "success" job.save() myThread.transaction.commit() path = os.path.join(cacheDir, job['name']) os.makedirs(path) f = open('%s/%s.out' % (path, job['name']), 'w') f.write(job['name']) f.close() job.setCache(path) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') testJobArchiver = JobArchiverPoller(config=config) testJobArchiver.algorithm() result = myThread.dbi.processData( "SELECT wmbs_job_state.name FROM wmbs_job_state INNER JOIN wmbs_job ON wmbs_job.state = wmbs_job_state.id" )[0].fetchall() for val in result: self.assertEqual(val.values(), ['cleanout']) dirList = os.listdir(cacheDir) for job in testJobGroup.jobs: self.assertEqual(job["name"] in dirList, False) logPath = os.path.join(config.JobArchiver.componentDir, 'logDir', 'w', 'wf001', 'JobCluster_0') logList = os.listdir(logPath) for job in testJobGroup.jobs: self.assertEqual( 'Job_%i.tar.bz2' % (job['id']) in logList, True, 'Could not find transferred tarball for job %i' % (job['id'])) pipe = Popen([ 'tar', '-jxvf', os.path.join(logPath, 'Job_%i.tar.bz2' % (job['id'])) ], stdout=PIPE, stderr=PIPE, shell=False) pipe.wait() #filename = '%s/%s/%s.out' %(cacheDir[1:], job['name'], job['name']) filename = 'Job_%i/%s.out' % (job['id'], job['name']) self.assertEqual(os.path.isfile(filename), True, 'Could not find file %s' % (filename)) f = open(filename, 'r') fileContents = f.readlines() f.close() self.assertEqual(fileContents[0].find(job['name']) > -1, True) shutil.rmtree('Job_%i' % (job['id'])) if os.path.isfile('Job_%i.tar.bz2' % (job['id'])): os.remove('Job_%i.tar.bz2' % (job['id'])) return