def testF_WMSMode(self): """ _WMSMode_ Try running things in WMS Mode. """ nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) config = self.getConfig() config.BossAir.pluginName = 'CondorPlugin' config.BossAir.submitWMSMode = True baAPI = BossAirAPI(config = config) workload = self.createTestWorkload() workloadName = "basicWorkload" changeState = ChangeState(config) nSubs = 5 nJobs = 10 cacheDir = os.path.join(self.testDir, 'CacheDir') jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = None) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config = config) jobSubmitter.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) baAPI.track() idleJobs = baAPI._loadByStatus(status = 'Idle') sn = "T2_US_UCSD" # Test the Site Info has been updated. Make Sure T2_US_UCSD is not in the sitelist # in BossAir_t.py baAPI.updateSiteInformation(idleJobs, sn, True) # Now kill 'em manually # command = ['condor_rm', self.user] # pipe = Popen(command, stdout = PIPE, stderr = PIPE, shell = False) # pipe.communicate() del jobSubmitter return
def testT_updateJobInfo(self): """ _updateJobInfo_ Test the updateSiteInformation method from PyCondorPlugin.py """ nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) config = self.getConfig() config.BossAir.pluginName = 'PyCondorPlugin' config.BossAir.submitWMSMode = True baAPI = BossAirAPI(config=config) workload = self.createTestWorkload() workloadName = "basicWorkload" changeState = ChangeState(config) nSubs = 1 nJobs = 2 cacheDir = os.path.join(self.testDir, 'CacheDir') jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=os.path.join( self.testDir, 'workloadTest', workloadName), site="se.T2_US_UCSD") for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config=config) jobSubmitter.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) baAPI.track() idleJobs = baAPI._loadByStatus(status='Idle') ## # Make one of the sites in the sitelist to be True for ABORTED/DRAINING/DOWN # updateSiteInformation() method should edit the classAd for all the jobs # that are bound for the site # Check the Q manually using condor_q -l <job id> # jtok = baAPI.updateSiteInformation(idleJobs, "T2_US_UCSD", True) if jtok != None: baAPI.kill( jtok, errorCode=61301 ) # errorCode can be either 61301/61302/61303 (Aborted/Draining/Down) return
def testF_WMSMode(self): """ _WMSMode_ Try running things in WMS Mode. """ nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) config = self.getConfig() config.BossAir.pluginName = 'PyCondorPlugin' config.BossAir.submitWMSMode = True baAPI = BossAirAPI(config=config) workload = self.createTestWorkload() workloadName = "basicWorkload" changeState = ChangeState(config) nSubs = 5 nJobs = 10 cacheDir = os.path.join(self.testDir, 'CacheDir') jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=os.path.join( self.testDir, 'workloadTest', workloadName), site=None) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config=config) jobSubmitter.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) baAPI.track() idleJobs = baAPI._loadByStatus(status='Idle') baAPI.kill(jobs=idleJobs) del jobSubmitter return
def testF_WMSMode(self): """ _WMSMode_ Try running things in WMS Mode. """ nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) config = self.getConfig() config.BossAir.pluginName = 'PyCondorPlugin' config.BossAir.submitWMSMode = True baAPI = BossAirAPI(config = config) workload = self.createTestWorkload() workloadName = "basicWorkload" changeState = ChangeState(config) nSubs = 5 nJobs = 10 cacheDir = os.path.join(self.testDir, 'CacheDir') jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = None) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config = config) jobSubmitter.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) baAPI.track() idleJobs = baAPI._loadByStatus(status = 'Idle') baAPI.kill(jobs = idleJobs) del jobSubmitter return
def testB_PluginTest(self): """ _PluginTest_ Now check that these functions worked if called through plugins Instead of directly. There are only three plugin """ #return myThread = threading.currentThread() config = self.getConfig() baAPI = BossAirAPI(config=config) # Create some jobs nJobs = 10 jobDummies = self.createDummyJobs(nJobs=nJobs, location='Xanadu') changeState = ChangeState(config) changeState.propagate(jobDummies, 'created', 'new') changeState.propagate(jobDummies, 'executing', 'created') # Prior to building the job, each job must have a plugin # and user assigned for job in jobDummies: job['plugin'] = 'TestPlugin' job['owner'] = 'tapas' baAPI.submit(jobs=jobDummies) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), nJobs) # Should be no more running jobs runningJobs = baAPI._listRunJobs() self.assertEqual(len(runningJobs), nJobs) # Test Plugin should complete all jobs baAPI.track() # Should be no more running jobs runningJobs = baAPI._listRunJobs() self.assertEqual(len(runningJobs), 0) # Check if they're complete completeJobs = baAPI.getComplete() self.assertEqual(len(completeJobs), nJobs) # Do this test because BossAir is specifically built # to keep it from finding completed jobs result = myThread.dbi.processData( "SELECT id FROM bl_runjob")[0].fetchall() self.assertEqual(len(result), nJobs) baAPI.removeComplete(jobs=jobDummies) result = myThread.dbi.processData( "SELECT id FROM bl_runjob")[0].fetchall() self.assertEqual(len(result), 0) return
def testA_APITest(self): """ _APITest_ This is a commissioning test that has very little to do with anything except loading the code. """ #return myThread = threading.currentThread() config = self.getConfig() baAPI = BossAirAPI(config=config) # We should have loaded a plugin self.assertTrue('TestPlugin' in baAPI.plugins.keys()) result = myThread.dbi.processData( "SELECT name FROM bl_status")[0].fetchall() statusList = [] for i in result: statusList.append(i.values()[0]) # We should have the plugin states in the database self.assertEqual(statusList.sort(), ['New', 'Dead', 'Gone'].sort()) # Create some jobs nJobs = 10 jobDummies = self.createDummyJobs(nJobs=nJobs) print(jobDummies) baAPI.createNewJobs(wmbsJobs=jobDummies) runningJobs = baAPI._listRunJobs() self.assertEqual(len(runningJobs), nJobs) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), nJobs) deadJobs = baAPI._loadByStatus(status='Dead') self.assertEqual(len(deadJobs), 0) raisesException = False self.assertRaises(BossAirException, baAPI._loadByStatus, status='FalseStatus') # Change the job status and update it for job in newJobs: job['status'] = 'Dead' baAPI._updateJobs(jobs=newJobs) # Test whether we see the job status as updated newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), 0) deadJobs = baAPI._loadByStatus(status='Dead') self.assertEqual(len(deadJobs), nJobs) # Can we load by BossAir ID? loadedJobs = baAPI._loadByID(jobs=deadJobs) self.assertEqual(len(loadedJobs), nJobs) # Can we load via WMBS? loadedJobs = baAPI.loadByWMBS(wmbsJobs=jobDummies) self.assertEqual(len(loadedJobs), nJobs) # See if we can delete jobs baAPI._deleteJobs(jobs=deadJobs) # Confirm that they're gone deadJobs = baAPI._loadByStatus(status='Dead') self.assertEqual(len(deadJobs), 0) self.assertEqual(len(baAPI.jobs), 0) return
def testG_gLiteTest(self): """ _gLiteTest_ This test works on the gLitePlugin, checking all of its functions with a single set of jobs """ config = self.getConfig() config.BossAir.gliteConf = '/afs/cern.ch/cms/LCG/LCG-2/UI/conf/glite_wms_CERN.conf' config.BossAir.credentialDir = '/home/crab/ALL_SETUP/credentials/' config.BossAir.gLiteProcesses = 2 config.BossAir.gLitePrefixEnv = "/lib64/" config.BossAir.pluginNames.append("gLitePlugin") config.BossAir.manualProxyPath = environ['X509_USER_PROXY'] config.Agent.serverDN = "/we/bypass/myproxy/logon" #config.BossAir.pluginNames = ["gLitePlugin"] baAPI = BossAirAPI(config=config) nJobs = 2 jobDummies = self.createDummyJobs(nJobs=nJobs, location='grid-ce-01.ba.infn.it') jobPackage = os.path.join(self.testDir, 'JobPackage.pkl') f = open(jobPackage, 'w') f.write(' ') f.close() sandbox = os.path.join(self.testDir, 'sandbox.box') f = open(sandbox, 'w') f.write(' ') f.close() jobList = [] userdn = executeCommand('grid-cert-info -subject -file %s' % config.BossAir.manualProxyPath) newuser = self.daoFactory(classname="Users.New") newuser.execute(dn=userdn) for j in jobDummies: job = j # {'id': j['id']} job['custom'] = {'location': 'grid-ce-01.ba.infn.it'} job['location'] = 'grid-ce-01.ba.infn.it' job['plugin'] = 'gLitePlugin' job['name'] = j['name'] job['cache_dir'] = self.testDir job['retry_count'] = 0 job['owner'] = userdn job['packageDir'] = self.testDir job['sandbox'] = sandbox job['priority'] = None jobList.append(job) baAPI.submit(jobs=jobList) # Should be new jobs newJobs = baAPI._loadByStatus(status='New') self.assertNotEqual(len(newJobs), nJobs) time.sleep(2) baAPI.track() # Should be not anymore marked as new newJobs = baAPI._loadByStatus('New', 0) self.assertNotEqual(len(newJobs), nJobs) # Killing all the jobs baAPI.kill(jobList) #time.sleep(15) baAPI.track() ## Issues running tests below due to glite delay on marking job as killed # Should be just running jobs #killedJobs = baAPI._loadByStatus('Cancelled by user', 0) #self.assertEqual(len(killedJobs), 0) # Check if they're complete #completeJobs = baAPI.getComplete() #self.assertEqual(len(completeJobs), nJobs) return
def testC_CondorTest(self): """ _CondorTest_ This test works on the SimpleCondorPlugin, checking all of its functions with a single set of jobs """ nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) # Get the config and set the removal time to -10 for testing config = self.getConfig() config.BossAir.removeTime = -10.0 nJobs = 10 jobDummies = self.createDummyJobs(nJobs=nJobs) baAPI = BossAirAPI(config=config, insertStates=True) jobPackage = os.path.join(self.testDir, 'JobPackage.pkl') f = open(jobPackage, 'w') f.write(' ') f.close() sandbox = os.path.join(self.testDir, 'sandbox.box') f = open(sandbox, 'w') f.write(' ') f.close() jobList = [] for j in jobDummies: tmpJob = {'id': j['id']} tmpJob['custom'] = {'location': 'malpaquet'} tmpJob['name'] = j['name'] tmpJob['cache_dir'] = self.testDir tmpJob['retry_count'] = 0 tmpJob['plugin'] = 'SimpleCondorPlugin' tmpJob['owner'] = 'tapas' tmpJob['packageDir'] = self.testDir tmpJob['sandbox'] = sandbox tmpJob['priority'] = None tmpJob['usergroup'] = "wheel" tmpJob['userrole'] = 'cmsuser' jobList.append(tmpJob) info = {} # info['packageDir'] = self.testDir info['index'] = 0 info['sandbox'] = sandbox baAPI.submit(jobs=jobList, info=info) nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), nJobs) baAPI.track() newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), 0) newJobs = baAPI._loadByStatus(status='Idle') self.assertEqual(len(newJobs), nJobs) # Do a second time to make sure that the cache # doesn't die on us baAPI.track() newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), 0) newJobs = baAPI._loadByStatus(status='Idle') self.assertEqual(len(newJobs), nJobs) baAPI.kill(jobs=jobList) nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) # Try resubmission for j in jobList: j['retry_count'] = 1 baAPI.submit(jobs=jobList, info=info) nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), nJobs) # See where they are baAPI.track() newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), 0) newJobs = baAPI._loadByStatus(status='Idle') self.assertEqual(len(newJobs), nJobs) # Now kill 'em manually command = ['condor_rm', self.user] pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False) pipe.communicate() # See what happened baAPI.track() newJobs = baAPI._loadByStatus(status='Idle') self.assertEqual(len(newJobs), 0) # newJobs = baAPI._loadByStatus(status = 'Removed') # self.assertEqual(len(newJobs), nJobs) # Because removal time is -10.0, jobs should remove immediately baAPI.track() # Assert that jobs were listed as completed myThread = threading.currentThread() newJobs = baAPI._loadByStatus(status='Removed', complete='0') self.assertEqual(len(newJobs), nJobs) return
def testD_PrototypeChain(self): """ _PrototypeChain_ Prototype the BossAir workflow """ dummymyThread = threading.currentThread() nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) config = self.getConfig() config.BossAir.pluginName = 'SimpleCondorPlugin' baAPI = BossAirAPI(config=config, insertStates=True) workload = self.createTestWorkload() workloadName = "basicWorkload" changeState = ChangeState(config) nSubs = 5 nJobs = 10 dummycacheDir = os.path.join(self.testDir, 'CacheDir') jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=os.path.join( self.testDir, 'workloadTest', workloadName), site='se.T2_US_UCSD') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config=config) jobTracker = JobTrackerPoller(config=config) statusPoller = StatusPoller(config=config) jobSubmitter.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), nSubs * nJobs) # Check WMBS getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) statusPoller.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), 0) newJobs = baAPI._loadByStatus(status='Idle') self.assertEqual(len(newJobs), nSubs * nJobs) # Tracker should do nothing jobTracker.algorithm() result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) # Wait for jobs to timeout due to short Pending wait period time.sleep(12) statusPoller.algorithm() newJobs = baAPI._loadByStatus(status='Idle') self.assertEqual(len(newJobs), 0) newJobs = baAPI._loadByStatus(status='Timeout', complete='0') self.assertEqual(len(newJobs), nSubs * nJobs) # Jobs should be gone nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) # Check if they're complete completeJobs = baAPI.getComplete() self.assertEqual(len(completeJobs), nSubs * nJobs) # Because they timed out, they all should have failed jobTracker.algorithm() result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state='JobFailed', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) return
def testH_ARCTest(self): """ _ARCTest_ This test works on the ARCPlugin, checking all of its functions with a single set of jobs """ nRunning = getNArcJobs() self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) config = self.getConfig() config.BossAir.pluginNames.append("ARCPlugin") #config.BossAir.pluginNames = ["ARCPlugin"] baAPI = BossAirAPI(config = config) nJobs = 2 jobDummies = self.createDummyJobs(nJobs = nJobs, location = 'jade-cms.hip.fi') #baAPI.createNewJobs(wmbsJobs = jobDummies) #changeState = ChangeState(config) #changeState.propagate(jobDummies, 'created', 'new') #changeState.propagate(jobDummies, 'executing', 'created') jobPackage = os.path.join(self.testDir, 'JobPackage.pkl') f = open(jobPackage, 'w') f.write(' ') f.close() sandbox = os.path.join(self.testDir, 'sandbox.box') f = open(sandbox, 'w') f.write(' ') f.close() jobList = [] for j in jobDummies: job = j # {'id': j['id']} job['custom'] = {'location': 'jade-cms.hip.fi'} job['location'] = 'jade-cms.hip.fi' job['plugin'] = 'ARCPlugin' job['name'] = j['name'] job['cache_dir'] = self.testDir job['retry_count'] = 0 job['owner'] = 'edelmann' job['packageDir'] = self.testDir job['sandbox'] = sandbox job['priority'] = None jobList.append(job) baAPI.submit(jobs = jobList) nRunning = getNArcJobs() self.assertEqual(nRunning, nJobs) newJobs = baAPI._loadByStatus(status = 'New') self.assertEqual(len(newJobs), nJobs) baAPI.track() newJobs = baAPI._loadByStatus(status = 'New') self.assertEqual(len(newJobs), 0) rJobs = baAPI._listRunJobs() nOldJobs = 0 for j in rJobs: if j['status'] != "New": nOldJobs += 1 self.assertEqual(nOldJobs, nJobs) #if baAPI.plugins['ARCPlugin'].stateDict[j['status']] in [ "Pending", "Running" ]: baAPI.kill(jobs = jobList) nRunning = getNArcJobs() self.assertEqual(nRunning, 0) # Try resubmission for j in jobList: j['retry_count'] = 1 succ, fail = baAPI.submit(jobs = jobList) time.sleep(30) nRunning = getNArcJobs() self.assertEqual(nRunning, nJobs) newJobs = baAPI._loadByStatus(status = 'New') self.assertEqual(len(newJobs), nJobs) # See where they are baAPI.track() newJobs = baAPI._loadByStatus(status = 'New') self.assertEqual(len(newJobs), 0) rJobs = baAPI._listRunJobs() nOldJobs = 0 idStr = "" for j in rJobs: idStr += " " + j['gridid'] if j['status'] != "New": nOldJobs += 1 self.assertEqual(nOldJobs, nJobs) # Now kill 'em manually no_jobs = True while no_jobs: command = 'LD_LIBRARY_PATH=$CLEAN_LD_LIBRARY_PATH ngkill -t 180 -a' pipe = Popen(command, stdout = PIPE, stderr = STDOUT, shell = True) output = pipe.communicate()[0] if output.find("Job information not found") >= 0: # It seems the jobs hasn't reached the ARC info.sys yet. # Sleep a while and try again time.sleep(20) continue else: no_jobs = False # Just to be sure, if the jobs were already finished, do a # 'ngclean' too. command = 'LD_LIBRARY_PATH=$CLEAN_LD_LIBRARY_PATH ngclean -t 180 -a' pipe = Popen(command, stdout = PIPE, stderr = STDOUT, shell = True) output = pipe.communicate()[0] # Make sure the killing of the jobs reaches the info.sys. still_jobs = True while still_jobs: command = 'LD_LIBRARY_PATH=$CLEAN_LD_LIBRARY_PATH ngstat -t 180 ' + idStr pipe = Popen(command, stdout = PIPE, stderr = STDOUT, shell = True) output = pipe.communicate()[0] if output.find("Job information not found") < 0: # It seems the killing of the jobs hasn't reached the ARC info.sys yet. # Sleep a while and try again time.sleep(20) continue else: still_jobs = False # See what happened baAPI.track() idJobs = baAPI._loadByID(rJobs) nActiveJobs = 0 nRemovedJobs = 0 for j in idJobs: if j['status'] not in [ "New", "KILLING", "KILLED", "LOST" ]: nActiveJobs += 1 if j['status'] in [ "KILLING", "KILLED", "LOST" ]: nRemovedJobs += 1 self.assertEqual(nActiveJobs, 0) self.assertEqual(nRemovedJobs, nJobs) return
def testF_WMSMode(self): """ _WMSMode_ Try running things in WMS Mode. """ nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) config = self.getConfig() config.BossAir.pluginName = 'CondorPlugin' config.BossAir.submitWMSMode = True baAPI = BossAirAPI(config=config) workload = self.createTestWorkload() workloadName = "basicWorkload" changeState = ChangeState(config) nSubs = 5 nJobs = 10 dummycacheDir = os.path.join(self.testDir, 'CacheDir') jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=os.path.join( self.testDir, 'workloadTest', workloadName), site=None) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config=config) jobSubmitter.algorithm() nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nSubs * nJobs) baAPI.track() idleJobs = baAPI._loadByStatus(status='Idle') sn = "T2_US_UCSD" # Test the Site Info has been updated. Make Sure T2_US_UCSD is not in the sitelist # in BossAir_t.py baAPI.updateSiteInformation(idleJobs, sn, True) # Now kill 'em manually # command = ['condor_rm', self.user] # pipe = Popen(command, stdout = PIPE, stderr = PIPE, shell = False) # pipe.communicate() del jobSubmitter return