def checkStatus(self): """ _checkStatus_ Run the BossAir track() function (self-contained) and then check for jobs that have timed out. """ runningJobs = self.bossAir.track() if len(runningJobs) < 1: # Then we have no jobs return if not self.timeouts: # Then we've set ourselves to have no timeouts # Get out and stay out return # Look for jobs that need to be killed jobsToKill = defaultdict(list) # Now check for timeouts for job in runningJobs: globalState = job.get('globalState', 'Error') statusTime = job.get('status_time', None) timeout = self.timeouts.get(globalState, None) if statusTime == 0: logging.error("Not killing job %i, the status time was zero", job['id']) continue if timeout and statusTime: if time.time() - float(statusTime) > float(timeout): # Timeout status is used by JobTracker to fail jobs in WMBS database logging.info( "Killing job %i because it has exceeded timeout for status '%s'", job['id'], globalState) job['status'] = 'Timeout' jobsToKill[globalState].append(job) timeOutCodeMap = {"Running": 71304, "Pending": 71305, "Error": 71306} # We need to show that the jobs are in state timeout # and then kill them. jobsToKillList = flattenList(jobsToKill.values()) myThread = threading.currentThread() myThread.transaction.begin() self.bossAir.update(jobs=jobsToKillList) for preJobStatus in jobsToKill: eCode = timeOutCodeMap.get( preJobStatus, 71307 ) # it shouldn't have 71307 (states should be among Running, Pending, Error) self.bossAir.kill(jobs=jobsToKill[preJobStatus], killMsg=WM_JOB_ERROR_CODES[eCode], errorCode=eCode) myThread.transaction.commit() return
def testFlattenList(self): """ Test the flattenList function (returns a flat list out of a list of lists) """ doubleList = [range(1, 4), range(10, 11), range(15, 18)] flatList = flattenList(doubleList) self.assertEqual(len(flatList), 7) self.assertEqual(set(flatList), set([1, 2, 3, 10, 15, 16, 17]))
def checkStatus(self): """ _checkStatus_ Run the BossAir track() function (self-contained) and then check for jobs that have timed out. """ runningJobs = self.bossAir.track() if len(runningJobs) < 1: # Then we have no jobs return if not self.timeouts: # Then we've set ourselves to have no timeouts # Get out and stay out return # Look for jobs that need to be killed jobsToKill = defaultdict(list) # Now check for timeouts for job in runningJobs: globalState = job.get('globalState', 'Error') statusTime = job.get('status_time', None) timeout = self.timeouts.get(globalState, None) if statusTime == 0: logging.error("Not killing job %i, the status time was zero", job['id']) continue if timeout and statusTime: if time.time() - float(statusTime) > float(timeout): # Timeout status is used by JobTracker to fail jobs in WMBS database logging.info("Killing job %i because it has exceeded timeout for status '%s'", job['id'], globalState) job['status'] = 'Timeout' jobsToKill[globalState].append(job) timeOutCodeMap = {"Running": 71304, "Pending": 71305, "Error": 71306} # We need to show that the jobs are in state timeout # and then kill them. jobsToKillList = flattenList(jobsToKill.values()) myThread = threading.currentThread() myThread.transaction.begin() self.bossAir.update(jobs=jobsToKillList) for preJobStatus in jobsToKill: eCode = timeOutCodeMap.get(preJobStatus, 71307) # it shouldn't have 71307 (states should be among Running, Pending, Error) self.bossAir.kill(jobs=jobsToKill[preJobStatus], killMsg=WM_JOB_ERROR_CODES[eCode], errorCode=eCode) myThread.transaction.commit() return