def testA_StraightThrough(self): """ _StraightThrough_ Just run everything straight through without any variations """ # Do pre-submit job check nRunning = getCondorRunningJobs() self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) myThread = threading.currentThread() workload = self.createTestWorkload() config = self.getConfig() name = 'WMAgent_Test1' site = self.sites[0] nSubs = 5 nFiles = 10 workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') # Create a collection of files self.createFileCollection(name = name, nSubs = nSubs, nFiles = nFiles, workflowURL = workloadPath, site = site) ############################################################ # Test the JobCreator config.Agent.componentName = 'JobCreator' testJobCreator = JobCreatorPoller(config = config) testJobCreator.algorithm() time.sleep(5) # Did all jobs get created? getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), nSubs*nFiles) # Count database objects result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall() self.assertEqual(len(result), nSubs * nFiles) # Find the test directory testDirectory = os.path.join(self.testDir, 'TestWorkload', 'ReReco') self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory)) self.assertTrue(len(os.listdir(testDirectory)) <= 20) groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0') # First job should be in here self.assertTrue('job_1' in os.listdir(groupDirectory)) jobFile = os.path.join(groupDirectory, 'job_1', 'job.pkl') self.assertTrue(os.path.isfile(jobFile)) f = open(jobFile, 'r') job = cPickle.load(f) f.close() self.assertEqual(job['workflow'], name) self.assertEqual(len(job['input_files']), 1) self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2') ############################################################### # Now test the JobSubmitter config.Agent.componentName = 'JobSubmitter' testJobSubmitter = JobSubmitterPoller(config = config) testJobSubmitter.algorithm() # Check that jobs are in the right state result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) # Check assigned locations getLocationAction = self.daoFactory(classname = "Jobs.GetLocation") for id in result: loc = getLocationAction.execute(jobid = id) self.assertEqual(loc, [[site]]) # Check to make sure we have running jobs nRunning = getCondorRunningJobs() self.assertEqual(nRunning, nFiles * nSubs) ################################################################# # Now the JobTracker config.Agent.componentName = 'JobTracker' testJobTracker = JobTrackerPoller(config = config) testJobTracker.setup() testJobTracker.algorithm() # Running the algo without removing the jobs should do nothing result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) condorRM() time.sleep(1) # All jobs gone? nRunning = getCondorRunningJobs() self.assertEqual(nRunning, 0) testJobTracker.algorithm() time.sleep(5) # Running the algo without removing the jobs should do nothing result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) ################################################################# # Now the JobAccountant # First you need to load all jobs self.getFWJRAction = self.daoFactory(classname = "Jobs.GetFWJRByState") completeJobs = self.getFWJRAction.execute(state = "complete") # Create reports for all jobs self.createReports(jobs = completeJobs, retryCount = 0) config.Agent.componentName = 'JobAccountant' testJobAccountant = JobAccountantPoller(config = config) testJobAccountant.setup() # It should do something with the jobs testJobAccountant.algorithm() # All the jobs should be done now result = getJobsAction.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Success', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) ####################################################################### # Now the JobArchiver config.Agent.componentName = 'JobArchiver' testJobArchiver = JobArchiverPoller(config = config) testJobArchiver.algorithm() # All the jobs should be cleaned up result = getJobsAction.execute(state = 'Success', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing") self.assertEqual(len(result), nSubs * nFiles) logDir = os.path.join(self.testDir, 'logs') for job in completeJobs: self.assertFalse(os.path.exists(job['fwjr_path'])) jobFolder = 'JobCluster_%i' \ % (int(job['id']/config.JobArchiver.numberOfJobsToCluster)) jobPath = os.path.join(logDir, jobFolder, 'Job_%i.tar' %(job['id'])) self.assertTrue(os.path.isfile(jobPath)) self.assertTrue(os.path.getsize(jobPath) > 0) ########################################################################### # Now the TaskAchiver config.Agent.componentName = 'TaskArchiver' testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing") self.assertEqual(len(result), 0) for jdict in completeJobs: job = Job(id = jdict['id']) self.assertFalse(job.exists()) if os.path.isdir('testDir'): shutil.rmtree('testDir') shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir')) return
def testReportHandling(self): """ _testReportHandling_ Verify that we're able to parse a CMSSW report, convert it to a Report() style report, pickle it and then have the accountant process it. """ self.procPath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") myReport = Report("cmsRun1") myReport.parse(self.procPath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("cmsRun1"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fwjrPath = os.path.join(self.tempDir, "ProcReport.pkl") cmsRunStep = myReport.retrieveStep("cmsRun1") cmsRunStep.status = 0 myReport.setTaskName('/TestWF/None') myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testJob["id"], fwjrPath = fwjrPath) pFile = DBSBufferFile(lfn = "/path/to/some/lfn", size = 600000, events = 60000) pFile.setAlgorithm(appName = "cmsRun", appVer = "UNKNOWN", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") pFile.setDatasetPath("/bogus/dataset/path") #pFile.addRun(Run(1, *[45])) pFile.create() config = self.createConfig(workerThreads = 1) accountant = JobAccountantPoller(config) accountant.setup() accountant.algorithm() self.verifyJobSuccess(self.testJob["id"]) self.verifyFileMetaData(self.testJob["id"], myReport.getAllFilesFromStep("cmsRun1")) inputFile = File(lfn = "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root") inputFile.load() self.testMergeJob = Job(name = "testMergeJob", files = [inputFile]) self.testMergeJob.create(group = self.mergeJobGroup) self.testMergeJob["state"] = "complete" self.stateChangeAction.execute(jobs = [self.testMergeJob]) self.mergePath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWMergeReport.xml") myReport = Report("mergeReco") myReport.parse(self.mergePath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("mergeReco"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fileRef.dataset = {"applicationName": "cmsRun", "applicationVersion": "CMSSW_3_4_2_patch1", "primaryDataset": "MinimumBias", "processedDataset": "Rereco-v1", "dataTier": "RECO"} fwjrPath = os.path.join(self.tempDir, "MergeReport.pkl") myReport.setTaskName('/MergeWF/None') cmsRunStep = myReport.retrieveStep("mergeReco") cmsRunStep.status = 0 myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testMergeJob["id"], fwjrPath = fwjrPath) accountant.algorithm() self.verifyJobSuccess(self.testMergeJob["id"]) self.verifyFileMetaData(self.testMergeJob["id"], myReport.getAllFilesFromStep("mergeReco")) return
def preInitialization(self): pollInterval = self.config.JobAccountant.pollInterval myThread = threading.currentThread() myThread.workerThreadManager.addWorker( JobAccountantPoller(self.config), pollInterval)
def roundtripHelper(self, wantsASO = True, wantsASOWin = True, preserveLFN = False): wmstatDB = self.couchServer.connectDatabase('asynctrackerpoller_t_wmstats') # first, add some documents telling ASO we have files ready to MOVE wmstatDB.queue( self.getWorkloadDoc( preserveLFN = preserveLFN ) ) wmstatDB.queue( self.getJobDoc() ) wmstatDB.commit() # make some test jobs testJob = self.createTestJob() self.setJobWantsASO(os.path.join(self.testDir, "oneaso.pkl"), preserveLFN = preserveLFN) self.assertEqual( testJob.getState().lower(), 'asopending') # now, make our ASOTrackerPoller and make sure that things are # still in pending asoPoller = AsyncStageoutTrackerPoller( self.getWMAgentConfig() ) self.assertEqual( testJob.getState().lower(), 'asopending') asoPoller.algorithm() self.assertEqual( testJob.getState().lower(), 'asopending') # okay, we have the right stuff in WMStats for ASO to pick it up. # let's run the algorithm... config = self.getASOConfig() # pull data from WMStats to ASO's database print "Pushing lfnDaemon" lfnDaemon = AsyncStageOut.LFNSourceDuplicator.LFNSourceDuplicator(config) lfnDaemon.algorithm() # tell ASO to "transfer" a file print "Done.. pushing transferDaemon" # trigger ASO's transfer logic (which we've noop'd) transferDaemon = AsyncStageOut.TransferDaemon.TransferDaemon(config) transferDaemon.get_tfc_rules = self._mock_get_tfc_rules print "Calling transferDaemon.algorithm()" transferDaemon.algorithm() transferDaemon.algorithm() transferDaemon.algorithm() print "Done...calling monitorDaemon" # ASO has allegedly transferred things, check the status of monitoring monitorDaemon = AsyncStageOut.AnalyticsDaemon.AnalyticsDaemon(config) monitorDaemon.algorithm() monitorDaemon.algorithm() # Now we have a user_monitoring document we can look at # see if the poller finds it print "Done...calling ASOTrackerPoller" # needs to call it twice since end_key isn't inclusive asoPoller.algorithm() asoPoller.algorithm() if wantsASOWin: self.assertEqual( testJob.getState().lower(), 'complete') else: self.assertEqual( testJob.getState().lower(), 'asofailed') # now that the file has been transferred, make sure that the accountant # sends it to jobsuccess jobAccountant = JobAccountantPoller(self.getWMAgentConfig()) jobAccountant.setup() jobAccountant.algorithm() if wantsASOWin: self.assertEqual( testJob.getState().lower(), 'success') else: self.assertEqual( testJob.getState().lower(), 'asofailed') return testJob