コード例 #1
0
ファイル: StatusPoller.py プロジェクト: johnhcasallasl/WMCore
    def checkStatus(self):
        """
        _checkStatus_

        Run the BossAir track() function (self-contained)
        and then check for jobs that have timed out.
        """

        runningJobs = self.bossAir.track()

        if len(runningJobs) < 1:
            # Then we have no jobs
            return

        if not self.timeouts:
            # Then we've set ourselves to have no timeouts
            # Get out and stay out
            return

        # Look for jobs that need to be killed
        jobsToKill = defaultdict(list)

        # Now check for timeouts
        for job in runningJobs:
            globalState = job.get('globalState', 'Error')
            statusTime = job.get('status_time', None)
            timeout = self.timeouts.get(globalState, None)
            if statusTime == 0:
                logging.error("Not killing job %i, the status time was zero",
                              job['id'])
                continue
            if timeout and statusTime:
                if time.time() - float(statusTime) > float(timeout):
                    # Timeout status is used by JobTracker to fail jobs in WMBS database
                    logging.info(
                        "Killing job %i because it has exceeded timeout for status '%s'",
                        job['id'], globalState)
                    job['status'] = 'Timeout'
                    jobsToKill[globalState].append(job)

        timeOutCodeMap = {"Running": 71304, "Pending": 71305, "Error": 71306}
        # We need to show that the jobs are in state timeout
        # and then kill them.
        jobsToKillList = flattenList(jobsToKill.values())
        myThread = threading.currentThread()
        myThread.transaction.begin()
        self.bossAir.update(jobs=jobsToKillList)
        for preJobStatus in jobsToKill:
            eCode = timeOutCodeMap.get(
                preJobStatus, 71307
            )  # it shouldn't have 71307 (states should be among Running, Pending, Error)
            self.bossAir.kill(jobs=jobsToKill[preJobStatus],
                              killMsg=WM_JOB_ERROR_CODES[eCode],
                              errorCode=eCode)
        myThread.transaction.commit()

        return
コード例 #2
0
ファイル: IterTools_t.py プロジェクト: prozober/WMCore
 def testFlattenList(self):
     """
     Test the flattenList function (returns a flat list out
     of a list of lists)
     """
     doubleList = [range(1, 4), range(10, 11), range(15, 18)]
     flatList = flattenList(doubleList)
     self.assertEqual(len(flatList), 7)
     self.assertEqual(set(flatList), set([1, 2, 3, 10, 15, 16, 17]))
コード例 #3
0
 def testFlattenList(self):
     """
     Test the flattenList function (returns a flat list out
     of a list of lists)
     """
     doubleList = [range(1, 4), range(10, 11), range(15, 18)]
     flatList = flattenList(doubleList)
     self.assertEqual(len(flatList), 7)
     self.assertEqual(set(flatList), set([1, 2, 3, 10, 15, 16, 17]))
コード例 #4
0
ファイル: StatusPoller.py プロジェクト: BrunoCoimbra/WMCore
    def checkStatus(self):
        """
        _checkStatus_

        Run the BossAir track() function (self-contained)
        and then check for jobs that have timed out.
        """


        runningJobs = self.bossAir.track()

        if len(runningJobs) < 1:
            # Then we have no jobs
            return

        if not self.timeouts:
            # Then we've set ourselves to have no timeouts
            # Get out and stay out
            return

        # Look for jobs that need to be killed
        jobsToKill = defaultdict(list)

        # Now check for timeouts
        for job in runningJobs:
            globalState = job.get('globalState', 'Error')
            statusTime = job.get('status_time', None)
            timeout = self.timeouts.get(globalState, None)
            if statusTime == 0:
                logging.error("Not killing job %i, the status time was zero", job['id'])
                continue
            if timeout and statusTime:
                if time.time() - float(statusTime) > float(timeout):
                    # Timeout status is used by JobTracker to fail jobs in WMBS database
                    logging.info("Killing job %i because it has exceeded timeout for status '%s'", job['id'], globalState)
                    job['status'] = 'Timeout'
                    jobsToKill[globalState].append(job)
        
        timeOutCodeMap = {"Running": 71304, "Pending": 71305, "Error": 71306}            
        # We need to show that the jobs are in state timeout
        # and then kill them.
        jobsToKillList = flattenList(jobsToKill.values())
        myThread = threading.currentThread()
        myThread.transaction.begin()
        self.bossAir.update(jobs=jobsToKillList)
        for preJobStatus in jobsToKill:
            eCode = timeOutCodeMap.get(preJobStatus, 71307) # it shouldn't have 71307 (states should be among Running, Pending, Error)
            self.bossAir.kill(jobs=jobsToKill[preJobStatus], killMsg=WM_JOB_ERROR_CODES[eCode], errorCode=eCode)
        myThread.transaction.commit()

        return