Beispiel #1
0
    def testE_FailJobs(self):
        """
        _FailJobs_

        Test our ability to fail jobs based on the information in the FWJR
        """
        workloadName = 'TestWorkload'

        self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', workloadName,
                                    'WMSandbox', 'WMWorkload.pkl')

        fwjrPath = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t",
                                "fwjrs/badBackfillJobReport.pkl")

        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs,
                                               workloadPath=workloadPath,
                                               fwjrPath=fwjrPath)

        badJobGroup = self.createTestJobGroup(nJobs=self.nJobs,
                                              workloadPath=workloadPath,
                                              fwjrPath=None,
                                              fileModifier='bad')

        config = self.getConfig()
        config.ErrorHandler.readFWJR = True
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(badJobGroup.jobs, 'created', 'new')
        changer.propagate(badJobGroup.jobs, 'executing', 'created')
        changer.propagate(badJobGroup.jobs, 'complete', 'executing')
        changer.propagate(badJobGroup.jobs, 'jobfailed', 'complete')

        testErrorHandler = ErrorHandlerPoller(config)
        # set reqAuxDB None for the test,
        testErrorHandler.reqAuxDB = None
        testErrorHandler.setup(None)
        testErrorHandler.exitCodesNoRetry = [8020]
        testErrorHandler.algorithm(None)

        # This should exhaust all jobs due to exit code
        # Except those with no fwjr
        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), 0)
        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), self.nJobs)
        idList = self.getJobs.execute(state='Exhausted')
        self.assertEqual(len(idList), self.nJobs)

        config.ErrorHandler.maxFailTime = -10
        testErrorHandler2 = ErrorHandlerPoller(config)
        # set reqAuxDB None for the test,
        testErrorHandler2.reqAuxDB = None

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')

        testErrorHandler2.algorithm(None)

        # This should exhaust all jobs due to timeout
        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), 0)
        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), self.nJobs)
        idList = self.getJobs.execute(state='Exhausted')
        self.assertEqual(len(idList), self.nJobs)

        config.ErrorHandler.maxFailTime = 24 * 3600
        config.ErrorHandler.passExitCodes = [8020]
        testErrorHandler3 = ErrorHandlerPoller(config)
        # set reqAuxDB None for the test,
        testErrorHandler3.reqAuxDB = None

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')

        testErrorHandler3.algorithm(None)

        # This should pass all jobs due to exit code
        idList = self.getJobs.execute(state='Created')
        self.assertEqual(len(idList), self.nJobs)

        return
Beispiel #2
0
    def testE_FailJobs(self):
        """
        _FailJobs_

        Test our ability to fail jobs based on the information in the FWJR
        """
        workloadName = 'TestWorkload'

        self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', workloadName,
                                    'WMSandbox', 'WMWorkload.pkl')

        fwjrPath = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t",
                                "fwjrs/badBackfillJobReport.pkl")

        testJobGroup = self.createTestJobGroup(nJobs=self.nJobs,
                                               workloadPath=workloadPath,
                                               fwjrPath=fwjrPath)

        badJobGroup = self.createTestJobGroup(nJobs=self.nJobs,
                                              workloadPath=workloadPath,
                                              fwjrPath=None,
                                              fileModifier='bad')

        config = self.getConfig()
        config.ErrorHandler.readFWJR = True
        changer = ChangeState(config)
        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(badJobGroup.jobs, 'created', 'new')
        changer.propagate(badJobGroup.jobs, 'executing', 'created')
        changer.propagate(badJobGroup.jobs, 'complete', 'executing')
        changer.propagate(badJobGroup.jobs, 'jobfailed', 'complete')

        testErrorHandler = ErrorHandlerPoller(config)
        # set reqAuxDB None for the test,
        testErrorHandler.reqAuxDB = None
        testErrorHandler.setup(None)
        testErrorHandler.exitCodesNoRetry = [8020]
        testErrorHandler.algorithm(None)

        # This should exhaust all jobs due to exit code
        # Except those with no fwjr
        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), 0)
        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), self.nJobs)
        idList = self.getJobs.execute(state='Exhausted')
        self.assertEqual(len(idList), self.nJobs)

        config.ErrorHandler.maxFailTime = -10
        testErrorHandler2 = ErrorHandlerPoller(config)
        # set reqAuxDB None for the test,
        testErrorHandler2.reqAuxDB = None

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')

        testErrorHandler2.algorithm(None)

        # This should exhaust all jobs due to timeout
        idList = self.getJobs.execute(state='JobFailed')
        self.assertEqual(len(idList), 0)
        idList = self.getJobs.execute(state='JobCooloff')
        self.assertEqual(len(idList), self.nJobs)
        idList = self.getJobs.execute(state='Exhausted')
        self.assertEqual(len(idList), self.nJobs)

        config.ErrorHandler.maxFailTime = 24 * 3600
        config.ErrorHandler.passExitCodes = [8020]
        testErrorHandler3 = ErrorHandlerPoller(config)
        # set reqAuxDB None for the test,
        testErrorHandler3.reqAuxDB = None

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')

        testErrorHandler3.algorithm(None)

        # This should pass all jobs due to exit code
        idList = self.getJobs.execute(state='Created')
        self.assertEqual(len(idList), self.nJobs)

        return