Exemple #1
0
    def testD_SubmitFailed(self):
        """
        _testD_SubmitFailed_

        Check if jobs without a possible site to run at go to SubmitFailed
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            site = [],
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName))

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config = config)
        jobSubmitter.algorithm()

        # Jobs should go to submit failed
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'SubmitFailed', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return
Exemple #2
0
    def testF_OverloadTest(self):
        """
        _OverloadTest_
        
        Test and see what happens if you put in more jobs
        Then the sites can handle
        """

        resourceControl = ResourceControl()
        for site in self.sites:
            resourceControl.insertThreshold(siteName=site, taskType="Silly", maxSlots=1)

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        workloadName = "basicWorkload"
        myThread = threading.currentThread()
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            type="Silly",
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        # Should be one job for each site
        nSites = len(self.sites)
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSites)

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Executing", jobType="Silly")
        self.assertEqual(len(result), nSites)
        result = getJobsAction.execute(state="Created", jobType="Silly")
        self.assertEqual(len(result), nJobs * nSubs - nSites)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        del jobSubmitter

        return
Exemple #3
0
    def testD_SubmitFailed(self):
        """
        _testD_SubmitFailed_

        Check if jobs without a possible site to run at go to SubmitFailed
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            site=[],
                                            workloadSpec=self.workloadSpecPath)

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Jobs should go to submit failed
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='SubmitFailed',
                                       jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return
    def testF_WMSMode(self):
        """
        _WMSMode_

        Try running things in WMS Mode.
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'CondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI  = BossAirAPI(config = config)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        cacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            site = None)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')


        jobSubmitter = JobSubmitterPoller(config = config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        baAPI.track()
        idleJobs = baAPI._loadByStatus(status = 'Idle')
        sn = "T2_US_UCSD"

        # Test the Site Info has been updated. Make Sure T2_US_UCSD is not in the sitelist
        # in BossAir_t.py
        baAPI.updateSiteInformation(idleJobs, sn, True)

        # Now kill 'em manually
        #        command = ['condor_rm', self.user]
        #        pipe = Popen(command, stdout = PIPE, stderr = PIPE, shell = False)
        #        pipe.communicate()
        
        del jobSubmitter

        return
    def testCaching(self):
        """
        _testCaching_

        Verify that JobSubmitter caching works.
        """
        mySubmitterPoller = JobSubmitterPoller(self.createConfig())
        mySubmitterPoller.refreshCache()

        self.assertEqual(len(mySubmitterPoller.cachedJobIDs), 0,
                         "Error: The job cache should be empty.")

        self.injectJobs()
        mySubmitterPoller.refreshCache()
        
        # Verify the cache is full
        self.assertEqual(len(mySubmitterPoller.cachedJobIDs), 20,
                         "Error: The job cache should contain 20 jobs.")        

        killWorkflow("wf001")
        mySubmitterPoller.refreshCache()
        
        # Verify that the workflow is gone from the cache
        self.assertEqual(len(mySubmitterPoller.cachedJobIDs), 10,
                         "Error: The job cache should contain 10 jobs.")        

        killWorkflow("wf002")
        mySubmitterPoller.refreshCache()
        
        # Verify that the workflow is gone from the cache
        self.assertEqual(len(mySubmitterPoller.cachedJobIDs), 0,
                         "Error: The job cache should be empty.")
        return
Exemple #6
0
    def testT_updateJobInfo(self):
        """
        _updateJobInfo_

        Test the updateSiteInformation method from PyCondorPlugin.py
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(
            nRunning, 0,
            "User currently has %i running jobs.  Test will not continue" %
            (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'PyCondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI = BossAirAPI(config=config)
        workload = self.createTestWorkload()
        workloadName = "basicWorkload"
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 2
        cacheDir = os.path.join(self.testDir, 'CacheDir')
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            site="se.T2_US_UCSD")
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        baAPI.track()
        idleJobs = baAPI._loadByStatus(status='Idle')

        ##
        # Make one of the sites in the sitelist to be True for ABORTED/DRAINING/DOWN
        # updateSiteInformation() method should edit the classAd for all the jobs
        # that are bound for the site
        # Check the Q manually using condor_q -l <job id>
        #
        jtok = baAPI.updateSiteInformation(idleJobs, "T2_US_UCSD", True)
        if jtok != None:
            baAPI.kill(
                jtok, errorCode=61301
            )  # errorCode can be either 61301/61302/61303 (Aborted/Draining/Down)

        return
Exemple #7
0
    def testF_WMSMode(self):
        """
        _WMSMode_

        Try running things in WMS Mode.
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(
            nRunning, 0,
            "User currently has %i running jobs.  Test will not continue" %
            (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'CondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI = BossAirAPI(config=config)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        cacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            site=None)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config=config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        # Now kill 'em manually
        command = ['condor_rm', self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        del jobSubmitter

        return
Exemple #8
0
    def testF_WMSMode(self):
        """
        _WMSMode_

        Try running things in WMS Mode.
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(
            nRunning, 0,
            "User currently has %i running jobs.  Test will not continue" %
            (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'PyCondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI = BossAirAPI(config=config)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        cacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            site=None)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config=config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        baAPI.track()
        idleJobs = baAPI._loadByStatus(status='Idle')

        baAPI.kill(jobs=idleJobs)

        del jobSubmitter

        return
    def testF_WMSMode(self):
        """
        _WMSMode_

        Try running things in WMS Mode.
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'PyCondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI  = BossAirAPI(config = config)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        cacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            site = None)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')


        jobSubmitter = JobSubmitterPoller(config = config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        baAPI.track()
        idleJobs = baAPI._loadByStatus(status = 'Idle')

        baAPI.kill(jobs = idleJobs)

        del jobSubmitter

        return
Exemple #10
0
    def testF_WMSMode(self):
        """
        _WMSMode_

        Try running things in WMS Mode.
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'CondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI  = BossAirAPI(config = config)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        cacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            site = None)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')


        jobSubmitter = JobSubmitterPoller(config = config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        # Now kill 'em manually
        command = ['condor_rm', self.user]
        pipe = Popen(command, stdout = PIPE, stderr = PIPE, shell = False)
        pipe.communicate()

        del jobSubmitter

        return
    def testT_updateJobInfo(self):
        """
        _updateJobInfo_

        Test the updateSiteInformation method from CondorPlugin.py
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'CondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI  = BossAirAPI(config=config)
        workload = self.createTestWorkload()
        workloadName = "basicWorkload"
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 2
        dummycacheDir = os.path.join(self.testDir, 'CacheDir')
        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(self.testDir,
                                                                      'workloadTest',
                                                                      workloadName),
                                            site="se.T2_US_UCSD")
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        baAPI.track()
        idleJobs = baAPI._loadByStatus(status='Idle')

        ##
        # Make one of the sites in the sitelist to be True for ABORTED/DRAINING/DOWN
        # updateSiteInformation() method should edit the classAd for all the jobs
        # that are bound for the site
        # Check the Q manually using condor_q -l <job id>
        #
        jtok = baAPI.updateSiteInformation(idleJobs, "T2_US_UCSD", True)
        if jtok != None:
            baAPI.kill(jtok, errorCode=71301)  # errorCode can be either 71301/71302/71303 (Aborted/Draining/Down)

        return
Exemple #12
0
    def testJobSiteDrain(self):
        """
        _testJobSiteDrain_

        Test the behavior of jobs pending to a single site that is in drain mode
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        jobSubmitter = JobSubmitterPoller(config=config)
        myResourceControl = ResourceControl(config)
        changeState = ChangeState(config)
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")

        nSubs = 1
        nJobs = 30

        site = 'T2_US_Nebraska'
        self.setResourceThresholds(site, pendingSlots=100, runningSlots=100,
                                   tasks=['Processing', 'Merge'],
                                   Processing={'pendingSlots': 10, 'runningSlots': 10},
                                   Merge={'pendingSlots': 10, 'runningSlots': 10, 'priority': 5})

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            site=[site],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # submit first 10 jobs
        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 10)

        myResourceControl.changeSiteState(site, 'Draining')

        # site is now in drain, so don't submit anything
        jobSubmitter.algorithm()

        # jobs were supposed to get killed, but I guess the MockPlugin doesnt do anything
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state='created', jobType="Processing")
        self.assertEqual(len(result), 20)
        result = getJobsAction.execute(state='submitfailed', jobType="Processing")
        self.assertEqual(len(result), 0)

        # make sure the drain grace period expires...
        time.sleep(3)
        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 10)
        # the remaining jobs should have gone to submitfailed by now
        result = getJobsAction.execute(state='submitfailed', jobType="Processing")
        self.assertEqual(len(result), 20)
        result = getJobsAction.execute(state='created', jobType="Processing")
        self.assertEqual(len(result), 0)
Exemple #13
0
    def testJobSiteDrain(self):
        """
        _testJobSiteDrain_

        Test the behavior of jobs pending to a single site that is in drain mode
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        jobSubmitter = JobSubmitterPoller(config=config)
        myResourceControl = ResourceControl(config)
        changeState = ChangeState(config)
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")

        nSubs = 1
        nJobs = 30

        site = 'T2_US_Nebraska'
        self.setResourceThresholds(site, pendingSlots=100, runningSlots=100,
                                   tasks=['Processing', 'Merge'],
                                   Processing={'pendingSlots': 10, 'runningSlots': 10},
                                   Merge={'pendingSlots': 10, 'runningSlots': 10, 'priority': 5})

        jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs,
                                            site=[site],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # submit first 10 jobs
        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 10)

        myResourceControl.changeSiteState(site, 'Draining')

        # site is now in drain, so don't submit anything
        jobSubmitter.algorithm()

        # jobs were supposed to get killed, but I guess the MockPlugin doesnt do anything
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state='created', jobType="Processing")
        self.assertEqual(len(result), 20)
        result = getJobsAction.execute(state='submitfailed', jobType="Processing")
        self.assertEqual(len(result), 0)

        # make sure the drain grace period expires...
        time.sleep(3)
        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 10)
        # the remaining jobs should have gone to submitfailed by now
        result = getJobsAction.execute(state='submitfailed', jobType="Processing")
        self.assertEqual(len(result), 20)
        result = getJobsAction.execute(state='created', jobType="Processing")
        self.assertEqual(len(result), 0)
Exemple #14
0
    def testF_PollerProfileTest(self):
        """
        _testF_PollerProfileTest_

        Submit a lot of jobs and test how long it takes for
        them to actually be submitted
        """

        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 100
        nJobs = 100
        sites = ['T1_US_FNAL']

        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 20000, runningSlots = -1, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 10000, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 10000, 'runningSlots' :-1, 'priority' : 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config = config)

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL')

        jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            taskType = 'Merge'))

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Actually run it
        startTime = time.time()
        cProfile.runctx("jobSubmitter.algorithm()", globals(), locals(), filename = "testStats.stat")
        stopTime = time.time()


        print "Job took %f seconds to complete" % (stopTime - startTime)

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()

        return
    def testCaching(self):
        """
        _testCaching_

        Verify that JobSubmitter caching works.
        """
        config = self.createConfig()
        mySubmitterPoller = JobSubmitterPoller(config)
        mySubmitterPoller.refreshCache()

        self.assertEqual(len(mySubmitterPoller.cachedJobIDs), 0,
                         "Error: The job cache should be empty.")

        self.injectJobs()
        mySubmitterPoller.refreshCache()

        # Verify the cache is full
        self.assertEqual(
            len(mySubmitterPoller.cachedJobIDs), 20,
            "Error: The job cache should contain 20 jobs.  Contains: %i" %
            len(mySubmitterPoller.cachedJobIDs))

        killWorkflow("wf001", jobCouchConfig=config)
        mySubmitterPoller.refreshCache()

        # Verify that the workflow is gone from the cache
        self.assertEqual(
            len(mySubmitterPoller.cachedJobIDs), 10,
            "Error: The job cache should contain 10 jobs. Contains: %i" %
            len(mySubmitterPoller.cachedJobIDs))

        killWorkflow("wf002", jobCouchConfig=config)
        mySubmitterPoller.refreshCache()

        # Verify that the workflow is gone from the cache
        self.assertEqual(
            len(mySubmitterPoller.cachedJobIDs), 0,
            "Error: The job cache should be empty.  Contains: %i" %
            len(mySubmitterPoller.cachedJobIDs))
        return
Exemple #16
0
    def preInitialization(self):
        """
        Setup the worker thread for jobSubmitter

        """
        logging.info("JobSubmitter.preInitialization")

        # Add event loop to worker manager
        myThread = threading.currentThread()

        pollInterval = self.config.JobSubmitter.pollInterval
        logging.info("Setting poll interval to %s seconds", pollInterval)
        myThread.workerThreadManager.addWorker(JobSubmitterPoller(self.config),
                                               pollInterval)

        return
Exemple #17
0
    def testE_SiteModesTest(self):
        """
        _testE_SiteModesTest_

        Test the behavior of the submitter in response to the different
        states of the sites
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)
        nSubs = 1
        nJobs = 20

        sites = ['T2_US_Florida', 'T2_TW_Taiwan', 'T3_CO_Uniandes', 'T1_US_FNAL']
        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 10, runningSlots = -1, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 10, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 10, 'runningSlots' :-1, 'priority' : 5})

        myResourceControl = ResourceControl()
        myResourceControl.changeSiteState('T2_US_Florida', 'Draining')
        # First test that we prefer Normal over drain, and T1 over T2/T3
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            site = ['se.%s' % x for x in sites],
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter = JobSubmitterPoller(config = config)
        # Actually run it
        jobSubmitter.algorithm()

        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at either FNAL, Taiwan or Uniandes. It's a random selection
        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        locationDict = getLocationAction.execute([{'jobid' : x} for x in result])
        for entry in locationDict:
            loc = entry['site_name']
            self.assertNotEqual(loc, 'T2_US_Florida')

        # Now set everything to down, check we don't submit anything
        for site in sites:
            myResourceControl.changeSiteState(site, 'Down')
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            site = ['se.%s' % x for x in sites],
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter.algorithm()
        # Nothing is submitted despite the empty slots at Uniandes and Florida
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now set everything to Aborted, and create Merge jobs. Those should fail
        # since the can only run at one place
        for site in sites:
            myResourceControl.changeSiteState(site, 'Aborted')

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(nSubs = nSubsMerge, nJobs = nJobsMerge,
                                            site = ['se.%s' % x for x in sites],
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            taskType = 'Merge')

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state = 'SubmitFailed', jobType = 'Merge')
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)
        result = getJobsAction.execute(state = 'Executing', jobType = 'Processing')
        self.assertEqual(len(result), nSubs * nJobs)

        return
Exemple #18
0
    def testD_WhiteListBlackList(self):
        """
        _testD_WhiteListBlackList_

        Test the whitelist/blacklist implementation
        Trust the jobCreator to get this in the job right
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10

        sites = ['T2_US_Florida', 'T2_TW_Taiwan', 'T2_CH_CERN', 'T3_CO_Uniandes']

        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 1000, runningSlots = -1, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 5000, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 1000, 'runningSlots' :-1, 'priority' : 5})

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            site = 'se.%s' % sites[-1],
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            bl = sites[:-1])

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config = config)

        # Actually run it
        jobSubmitter.algorithm()

        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at T3_CO_Uniandes
        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        locationDict = getLocationAction.execute([{'jobid' : x} for x in result])
        for entry in locationDict:
            loc = entry['site_name']
            self.assertEqual(loc, 'T3_CO_Uniandes')

        # Run again and test the whiteList
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            site = 'se.%s' % 'T2_CH_CERN',
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            wl = ['T2_CH_CERN'])

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Run it
        jobSubmitter.algorithm()

        # You'll have jobs from the previous run still in the database
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        # All jobs should be at CERN or Uniandes
        locationDict = getLocationAction.execute([{'jobid' : x} for x in result])
        for entry in locationDict[nSubs * nJobs:]:
            loc = entry['site_name']
            self.assertEqual(loc, 'T2_CH_CERN')

        # Run again with an invalid whitelist
        # After this point, the original two sets of jobs will be executing
        # The rest of the jobs should move to submitFailed
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            site = 'se.%s' % 'T2_CH_CERN',
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            wl = ['T2_US_Namibia'])

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Jobs should be gone
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state = 'SubmitFailed', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Run again with all sites blacklisted
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            site = ['se.%s' % x for x in sites],
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            bl = sites)

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Jobs should go to submit failed
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state = 'SubmitFailed', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        return
Exemple #19
0
    def testC_prioritization(self):
        """
        _testC_prioritization_

        Check that jobs are prioritized by job type and by oldest workflow
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        sites = ['T1_US_FNAL']

        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 10, runningSlots = -1, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 50, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 10, 'runningSlots' :-1, 'priority' : 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config = config)

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            name = 'OldestWorkflow')
        jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            taskType = 'Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Merge goes first
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Merge")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 0)

        # Create a newer workflow processing, and after some new jobs for an old workflow

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            name = 'NewestWorkflow')

        jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                    task = workload.getTask("ReReco"),
                                    workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                workloadName),
                                    site = 'se.%s' % 'T1_US_FNAL',
                                    name = 'OldestWorkflow'))

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Move pending jobs to running

        getRunJobID = self.baDaoFactory(classname = "LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname = "SetStatus")

        for idx in range(2):
            result = getJobsAction.execute(state = 'Executing')
            binds = []
            for jobId in result:
                binds.append({'id' : jobId, 'retry_count' : 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')

            # Run again on created workflows
            jobSubmitter.algorithm()

            result = getJobsAction.execute(state = 'Created', jobType = "Merge")
            self.assertEqual(len(result), 0)
            result = getJobsAction.execute(state = 'Executing', jobType = "Merge")
            self.assertEqual(len(result), 10)
            result = getJobsAction.execute(state = 'Created', jobType = "Processing")
            self.assertEqual(len(result), 30 - (idx + 1) * 10)
            result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
            self.assertEqual(len(result), (idx + 1) * 10)

            # Check that older workflow goes first even with newer jobs
            getWorkflowAction = self.daoFactory(classname = "Jobs.GetWorkflowTask")
            workflows = getWorkflowAction.execute(result)
            for workflow in workflows:
                self.assertEqual(workflow['name'], 'OldestWorkflow')

        return
Exemple #20
0
    def testCaching(self):
        """
        _testCaching_

        Verify that JobSubmitter caching works.
        """
        config = self.createConfig()
        mySubmitterPoller = JobSubmitterPoller(config)
        mySubmitterPoller.getThresholds()
        mySubmitterPoller.refreshCache()

        self.assertEqual(len(mySubmitterPoller.jobDataCache), 0,
                         "Error: The job cache should be empty.")

        self.injectJobs()
        mySubmitterPoller.refreshCache()

        # Verify the cache is full
        self.assertEqual(len(mySubmitterPoller.jobDataCache), 20,
                         "Error: The job cache should contain 20 jobs.  Contains: %i" % len(
                             mySubmitterPoller.jobDataCache))

        killWorkflow("wf001", jobCouchConfig=config)
        mySubmitterPoller.refreshCache()

        # Verify that the workflow is gone from the cache
        self.assertEqual(len(mySubmitterPoller.jobDataCache), 10,
                         "Error: The job cache should contain 10 jobs. Contains: %i" % len(
                             mySubmitterPoller.jobDataCache))

        killWorkflow("wf002", jobCouchConfig=config)
        mySubmitterPoller.refreshCache()

        # Verify that the workflow is gone from the cache
        self.assertEqual(len(mySubmitterPoller.jobDataCache), 0,
                         "Error: The job cache should be empty.  Contains: %i" % len(mySubmitterPoller.jobDataCache))
        return
Exemple #21
0
    def testMemoryProfile(self):
        """
        _testMemoryProfile_

        Creates 20k jobs and keep refreshing the cache and submitting
        them between the components cycle

        Example using memory_profiler library, unfortunately the source
        code has to be updated with decorators.
        NOTE: Never run it on jenkins
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)
        # myResourceControl = ResourceControl(config)

        nSubs = 20
        nJobs = 100

        sites = ['T2_US_Florida', 'T2_RU_INR', 'T3_CO_Uniandes', 'T1_US_FNAL']
        allSites = CRIC().PSNtoPNNMap('*')

        for site in allSites:
            self.setResourceThresholds(site, pendingSlots=20000, runningSlots=999999, tasks=['Processing', 'Merge'],
                                       Processing={'pendingSlots': 10000, 'runningSlots': 999999},
                                       Merge={'pendingSlots': 10000, 'runningSlots': 999999, 'priority': 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)

        # Actually run it
        jobSubmitter.algorithm()  # cycle 1

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)
        # myResourceControl.changeSiteState('T2_US_Florida', 'Draining')
        jobSubmitter.algorithm()  # cycle 2

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)
        # myResourceControl.changeSiteState('T2_RU_INR', 'Draining')
        jobSubmitter.algorithm()  # cycle 3

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)
        # myResourceControl.changeSiteState('T3_CO_Uniandes', 'Draining')
        jobSubmitter.algorithm()  # cycle 4

        # myResourceControl.changeSiteState('T2_RU_INR', 'Normal')
        jobSubmitter.algorithm()  # cycle 5

        # myResourceControl.changeSiteState('T2_US_Florida', 'Normal')
        jobSubmitter.algorithm()  # cycle 6

        # myResourceControl.changeSiteState('T2_RU_INR', 'Normal')
        jobSubmitter.algorithm()  # cycle 7

        # myResourceControl.changeSiteState('T3_CO_Uniandes', 'Normal')
        jobSubmitter.algorithm()  # cycle 8
        jobSubmitter.algorithm()  # cycle 9, nothing to submit

        return
Exemple #22
0
    def testE_SiteModesTest(self):
        """
        _testE_SiteModesTest_

        Test the behavior of the submitter in response to the different
        states of the sites
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)
        nSubs = 1
        nJobs = 20

        sites = [
            'T2_US_Florida', 'T2_TW_Taiwan', 'T3_CO_Uniandes', 'T1_US_FNAL'
        ]
        for site in sites:
            self.setResourceThresholds(site,
                                       pendingSlots=10,
                                       runningSlots=-1,
                                       tasks=['Processing', 'Merge'],
                                       Processing={
                                           'pendingSlots': 10,
                                           'runningSlots': -1
                                       },
                                       Merge={
                                           'pendingSlots': 10,
                                           'runningSlots': -1,
                                           'priority': 5
                                       })

        myResourceControl = ResourceControl(config)
        myResourceControl.changeSiteState('T2_US_Florida', 'Draining')
        # First test that we prefer Normal over drain, and T1 over T2/T3
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter = JobSubmitterPoller(config=config)
        # Actually run it
        jobSubmitter.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at either FNAL, Taiwan or Uniandes. It's a random selection
        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        locationDict = getLocationAction.execute([{
            'jobid': x
        } for x in result])
        for entry in locationDict:
            loc = entry['site_name']
            self.assertNotEqual(loc, 'T2_US_Florida')

        # Now set everything to down, check we don't submit anything
        for site in sites:
            myResourceControl.changeSiteState(site, 'Down')
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')
        jobSubmitter.algorithm()
        # Nothing is submitted despite the empty slots at Uniandes and Florida
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now set everything to Drain and create Merge jobs. Those should be submitted
        for site in sites:
            myResourceControl.changeSiteState(site, 'Draining')

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(nSubs=nSubsMerge,
                                            nJobs=nJobsMerge,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            taskType='Merge')

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType='Merge')
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)

        # Now set everything to Aborted, and create Merge jobs. Those should fail
        # since the can only run at one place
        for site in sites:
            myResourceControl.changeSiteState(site, 'Aborted')

        nSubsMerge = 1
        nJobsMerge = 5
        jobGroupList = self.createJobGroups(nSubs=nSubsMerge,
                                            nJobs=nJobsMerge,
                                            site=[x for x in sites],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            taskType='Merge')

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        result = getJobsAction.execute(state='SubmitFailed', jobType='Merge')
        self.assertEqual(len(result), nSubsMerge * nJobsMerge)
        result = getJobsAction.execute(state='Executing', jobType='Processing')
        self.assertEqual(len(result), nSubs * nJobs)

        return
Exemple #23
0
    def testC_prioritization(self):
        """
        _testC_prioritization_

        Check that jobs are prioritized by job type and by oldest workflow
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        site = "T1_US_FNAL"

        self.setResourceThresholds(site,
                                   pendingSlots=10,
                                   runningSlots=10000,
                                   tasks=['Processing', 'Merge'],
                                   Processing={
                                       'pendingSlots': 50,
                                       'runningSlots': 10000
                                   },
                                   Merge={
                                       'pendingSlots': 10,
                                       'runningSlots': 10000,
                                       'priority': 5
                                   })

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            name='OldestWorkflow')
        jobGroupList.extend(
            self.createJobGroups(nSubs=nSubs,
                                 nJobs=nJobs,
                                 task=workload.getTask("ReReco"),
                                 workloadSpec=self.workloadSpecPath,
                                 site=site,
                                 taskType='Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Merge goes first
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Merge")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 0)

        # Create a newer workflow processing, and after some new jobs for an old workflow

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            name='OldestWorkflow')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            name='NewestWorkflow')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Move pending jobs to running

        getRunJobID = self.baDaoFactory(classname="LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname="SetStatus")

        for idx in range(2):
            result = getJobsAction.execute(state='Executing')
            binds = []
            for jobId in result:
                binds.append({'id': jobId, 'retry_count': 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')

            # Run again on created workflows
            jobSubmitter.algorithm()

            result = getJobsAction.execute(state='Created', jobType="Merge")
            self.assertEqual(len(result), 0)
            result = getJobsAction.execute(state='Executing', jobType="Merge")
            self.assertEqual(len(result), 10)
            result = getJobsAction.execute(state='Created',
                                           jobType="Processing")
            self.assertEqual(len(result), 30 - (idx + 1) * 10)
            result = getJobsAction.execute(state='Executing',
                                           jobType="Processing")
            self.assertEqual(len(result), (idx + 1) * 10)

            # Check that older workflow goes first even with newer jobs
            getWorkflowAction = self.daoFactory(
                classname="Jobs.GetWorkflowTask")
            workflows = getWorkflowAction.execute(result)
            for workflow in workflows:
                self.assertEqual(workflow['name'], 'OldestWorkflow')

        return
Exemple #24
0
    def testD_CreamCETest(self):
        """
        _CreamCETest_

        This is for submitting to Cream CEs.  Don't use it.
        """

        return

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        workloadName = "basicWorkload"

        myThread = threading.currentThread()

        workload = self.createTestWorkload()

        config = self.getConfig()
        config.JobSubmitter.pluginName = "CreamPlugin"

        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        # Add a new site
        siteName = "creamSite"
        ceName = "https://cream-1-fzk.gridka.de:8443/ce-cream/services/CREAM2  pbs cmsXS"
        # ceName = "127.0.0.1"
        locationAction = self.daoFactory(classname="Locations.New")
        pendingSlots = self.daoFactory(classname="Locations.SetPendingSlots")
        locationAction.execute(siteName=siteName, seName=siteName, ceName=ceName)
        pendingSlots.execute(siteName=siteName, pendingSlots=1000)

        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName=siteName, seName=siteName, ceName=ceName)
        resourceControl.insertThreshold(siteName=siteName, taskType="Processing", maxSlots=10000)

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=siteName,
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        if os.path.exists("CacheDir"):
            shutil.rmtree("CacheDir")
        shutil.copytree(self.testDir, "CacheDir")

        return
Exemple #25
0
    def testE_WhiteListBlackList(self):
        """
        _WhiteListBlackList_

        Test the whitelist/blacklist implementation
        Trust the jobCreator to get this in the job right
        """
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        workloadName = "basicWorkload"
        myThread = threading.currentThread()
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            bl=["T2_US_Florida", "T2_TW_Taiwan", "T1_CH_CERN"],
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        if os.path.isdir("CacheDir"):
            shutil.rmtree("CacheDir")
        shutil.copytree("%s" % self.testDir, os.path.join(os.getcwd(), "CacheDir"))

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs * nSubs)

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at UCSD
        submitFile = None
        for file in os.listdir(config.JobSubmitter.submitDir):
            if re.search("submit", file):
                submitFile = file
        self.assertTrue(submitFile != None)
        # submitFile = os.listdir(config.JobSubmitter.submitDir)[0]
        self.checkJDL(config=config, cacheDir=cacheDir, submitFile=submitFile, site="T2_US_UCSD")

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        # Run again and test the whiteList
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            wl=["T2_US_UCSD"],
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        if os.path.isdir("CacheDir"):
            shutil.rmtree("CacheDir")
        shutil.copytree("%s" % self.testDir, os.path.join(os.getcwd(), "CacheDir"))

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs * nSubs)

        # You'll have jobs from the previous run still in the database
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        # All jobs should be at UCSD
        submitFile = None
        for file in os.listdir(config.JobSubmitter.submitDir):
            if re.search("submit", file):
                submitFile = file
        self.assertTrue(submitFile != None)
        self.checkJDL(config=config, cacheDir=cacheDir, submitFile=submitFile, site="T2_US_UCSD", noIndex=True)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        # Run again with an invalid whitelist
        # NOTE: After this point, the original two sets of jobs will be executing
        # The rest of the jobs should move to submitFailed
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            wl=["T2_US_Namibia"],
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        # Check to make sure we have running jobs
        # nRunning = getCondorRunningJobs(self.user)
        # self.assertEqual(nRunning, 0)

        # Jobs should be gone
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state="SubmitFailed", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        # Run again with all sites blacklisted
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            bl=self.sites,
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)

        # Jobs should be gone
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state="SubmitFailed", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        del jobSubmitter
        return
Exemple #26
0
    def testG_IndexErrorTest(self):
        """
        _IndexErrorTest_

        Check to see you get proper indexes for the jobPackages
        if you have more jobs then you normally run at once.
        """
        workloadName = "basicWorkload"

        myThread = threading.currentThread()

        workload = self.createTestWorkload()

        config = self.getConfig()
        config.JobSubmitter.jobsPerWorker = 1
        config.JobSubmitter.collectionSize = 1

        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site="se.T2_US_UCSD",
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        if os.path.exists("CacheDir"):
            shutil.rmtree("CacheDir")
        shutil.copytree(self.testDir, "CacheDir")

        # Check that jobs are in the right state
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check on the JDL
        submitFile = None
        for file in os.listdir(config.JobSubmitter.submitDir):
            if re.search("submit", file):
                submitFile = file
        self.assertTrue(submitFile != None)
        self.checkJDL(config=config, cacheDir=cacheDir, submitFile=submitFile, site="T2_US_UCSD", indexFlag=True)

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs * nSubs)

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        del jobSubmitter
        return
Exemple #27
0
    def testF_WMSMode(self):
        """
        _WMSMode_

        Try running things in WMS Mode.
        """

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(
            nRunning, 0,
            "User currently has %i running jobs.  Test will not continue" %
            (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'CondorPlugin'
        config.BossAir.submitWMSMode = True

        baAPI = BossAirAPI(config=config)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        dummycacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            site=None)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config=config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        baAPI.track()
        idleJobs = baAPI._loadByStatus(status='Idle')
        sn = "T2_US_UCSD"

        # Test the Site Info has been updated. Make Sure T2_US_UCSD is not in the sitelist
        # in BossAir_t.py
        baAPI.updateSiteInformation(idleJobs, sn, True)

        # Now kill 'em manually
        #        command = ['condor_rm', self.user]
        #        pipe = Popen(command, stdout = PIPE, stderr = PIPE, shell = False)
        #        pipe.communicate()

        del jobSubmitter

        return
Exemple #28
0
    def testMemoryProfile(self):
        """
        _testMemoryProfile_

        Creates 20k jobs and keep refreshing the cache and submitting
        them between the components cycle

        Example using memory_profiler library, unfortunately the source
        code has to be updated with decorators.
        NOTE: Never run it on jenkins
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)
        # myResourceControl = ResourceControl(config)

        nSubs = 20
        nJobs = 100

        sites = ['T2_US_Florida', 'T2_RU_INR', 'T3_CO_Uniandes', 'T1_US_FNAL']
        allSites = CRIC().PSNtoPNNMap('*')

        for site in allSites:
            self.setResourceThresholds(site, pendingSlots=20000, runningSlots=999999, tasks=['Processing', 'Merge'],
                                       Processing={'pendingSlots': 10000, 'runningSlots': 999999},
                                       Merge={'pendingSlots': 10000, 'runningSlots': 999999, 'priority': 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)

        # Actually run it
        jobSubmitter.algorithm()  # cycle 1

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)
        # myResourceControl.changeSiteState('T2_US_Florida', 'Draining')
        jobSubmitter.algorithm()  # cycle 2

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)
        # myResourceControl.changeSiteState('T2_RU_INR', 'Draining')
        jobSubmitter.algorithm()  # cycle 3

        self.createJobGroups(nSubs=nSubs, nJobs=nJobs, wfPrio=10,
                             task=workload.getTask("ReReco"),
                             workloadSpec=self.workloadSpecPath,
                             site=[x for x in sites], changeState=changeState)
        # myResourceControl.changeSiteState('T3_CO_Uniandes', 'Draining')
        jobSubmitter.algorithm()  # cycle 4

        # myResourceControl.changeSiteState('T2_RU_INR', 'Normal')
        jobSubmitter.algorithm()  # cycle 5

        # myResourceControl.changeSiteState('T2_US_Florida', 'Normal')
        jobSubmitter.algorithm()  # cycle 6

        # myResourceControl.changeSiteState('T2_RU_INR', 'Normal')
        jobSubmitter.algorithm()  # cycle 7

        # myResourceControl.changeSiteState('T3_CO_Uniandes', 'Normal')
        jobSubmitter.algorithm()  # cycle 8
        jobSubmitter.algorithm()  # cycle 9, nothing to submit

        return
Exemple #29
0
    def testC_prioritization(self):
        """
        _testC_prioritization_

        Check that jobs are prioritized by job type and by oldest workflow
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        site = "T1_US_FNAL"

        self.setResourceThresholds(
            site,
            pendingSlots=10,
            runningSlots=-1,
            tasks=["Processing", "Merge"],
            Processing={"pendingSlots": 50, "runningSlots": -1},
            Merge={"pendingSlots": 10, "runningSlots": -1, "priority": 5},
        )

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
            name="OldestWorkflow",
        )
        jobGroupList.extend(
            self.createJobGroups(
                nSubs=nSubs,
                nJobs=nJobs,
                task=workload.getTask("ReReco"),
                workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
                site=site,
                taskType="Merge",
            )
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        jobSubmitter.algorithm()

        # Merge goes first
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Merge")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 10)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), 0)

        # Create a newer workflow processing, and after some new jobs for an old workflow

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site=site,
            name="NewestWorkflow",
        )

        jobGroupList.extend(
            self.createJobGroups(
                nSubs=nSubs,
                nJobs=nJobs,
                task=workload.getTask("ReReco"),
                workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
                site=site,
                name="OldestWorkflow",
            )
        )

        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Move pending jobs to running

        getRunJobID = self.baDaoFactory(classname="LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname="SetStatus")

        for idx in range(2):
            result = getJobsAction.execute(state="Executing")
            binds = []
            for jobId in result:
                binds.append({"id": jobId, "retry_count": 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x["id"] for x in runJobIds], "Running")

            # Run again on created workflows
            jobSubmitter.algorithm()

            result = getJobsAction.execute(state="Created", jobType="Merge")
            self.assertEqual(len(result), 0)
            result = getJobsAction.execute(state="Executing", jobType="Merge")
            self.assertEqual(len(result), 10)
            result = getJobsAction.execute(state="Created", jobType="Processing")
            self.assertEqual(len(result), 30 - (idx + 1) * 10)
            result = getJobsAction.execute(state="Executing", jobType="Processing")
            self.assertEqual(len(result), (idx + 1) * 10)

            # Check that older workflow goes first even with newer jobs
            getWorkflowAction = self.daoFactory(classname="Jobs.GetWorkflowTask")
            workflows = getWorkflowAction.execute(result)
            for workflow in workflows:
                self.assertEqual(workflow["name"], "OldestWorkflow")

        return
Exemple #30
0
    def testA_BasicTest(self):
        """
        Use the MockPlugin to create a simple test
        Check to see that all the jobs were "submitted",
        don't care about thresholds
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 20
        site = "T2_US_UCSD"

        self.setResourceThresholds(site,
                                   pendingSlots=50,
                                   runningSlots=100,
                                   tasks=['Processing', 'Merge'],
                                   Processing={
                                       'pendingSlots': 50,
                                       'runningSlots': 100
                                   },
                                   Merge={
                                       'pendingSlots': 50,
                                       'runningSlots': 100
                                   })

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid=jobId)
            self.assertEqual(loc, [['T2_US_UCSD']])

        # Run another cycle, it shouldn't submit anything. There isn't anything to submit
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nSubs = 1
        nJobs = 10

        # Submit another 10 jobs
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            taskType="Merge")
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Check that the jobs are available for submission and run another cycle
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), nSubs * nJobs)
        jobSubmitter.algorithm()

        # Check that the last 10 jobs were submitted as well.
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state='Executing', jobType="Merge")
        self.assertEqual(len(result), nSubs * nJobs)

        return
Exemple #31
0
    def testB_thresholdTest(self):
        """
        _testB_thresholdTest_

        Check that the threshold management is working,
        this requires checks on pending/running jobs globally
        at a site and per task/site
        """
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10
        site = "T1_US_FNAL"

        self.setResourceThresholds(site,
                                   pendingSlots=50,
                                   runningSlots=220,
                                   tasks=['Processing', 'Merge'],
                                   Processing={
                                       'pendingSlots': 45,
                                       'runningSlots': 200
                                   },
                                   Merge={
                                       'pendingSlots': 10,
                                       'runningSlots': 20,
                                       'priority': 5
                                   })

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config=config)

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter.algorithm()

        # Check that jobs are in the right state,
        # here we are limited by the pending threshold for the Processing task (45)
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 45)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid=jobId)
            self.assertEqual(loc, [['T1_US_FNAL']])

        # Run another cycle, it shouldn't submit anything. Jobs are still in pending
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 45)

        # Now put 10 Merge jobs, only 5 can be submitted, there we hit the global pending threshold for the site
        nSubs = 1
        nJobs = 10
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site,
                                            taskType='Merge')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()
        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Created', jobType="Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 45)

        # Now let's test running thresholds
        # The scenario will be setup as follows: Move all current jobs as running
        # Create 300 Processing jobs and 300 merge jobs
        # Run 5 polling cycles, moving all pending jobs to running in between
        # Result is, merge is left at 30 running 0 pending and processing is left at 240 running 0 pending
        # Processing has 110 jobs in queue and Merge 280
        # This tests all threshold dynamics including the prioritization of merge over processing
        nSubs = 1
        nJobs = 300
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=self.workloadSpecPath,
                                            site=site)
        jobGroupList.extend(
            self.createJobGroups(nSubs=nSubs,
                                 nJobs=nJobs,
                                 task=workload.getTask("ReReco"),
                                 workloadSpec=self.workloadSpecPath,
                                 site=site,
                                 taskType='Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        getRunJobID = self.baDaoFactory(classname="LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname="SetStatus")

        for i in range(5):
            result = getJobsAction.execute(state='Executing')
            binds = []
            for jobId in result:
                binds.append({'id': jobId, 'retry_count': 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')
            jobSubmitter.algorithm()

        result = getJobsAction.execute(state='Executing', jobType='Processing')
        self.assertEqual(len(result), 240)
        result = getJobsAction.execute(state='Created', jobType='Processing')
        self.assertEqual(len(result), 110)
        result = getJobsAction.execute(state='Executing', jobType='Merge')
        self.assertEqual(len(result), 30)
        result = getJobsAction.execute(state='Created', jobType='Merge')
        self.assertEqual(len(result), 280)

        return
Exemple #32
0
    def testD_PrototypeChain(self):
        """
        _PrototypeChain_

        Prototype the BossAir workflow
        """
        dummymyThread = threading.currentThread()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(
            nRunning, 0,
            "User currently has %i running jobs.  Test will not continue" %
            (nRunning))

        config = self.getConfig()
        config.BossAir.pluginName = 'SimpleCondorPlugin'

        baAPI = BossAirAPI(config=config, insertStates=True)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        dummycacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            site='se.T2_US_UCSD')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config=config)
        jobTracker = JobTrackerPoller(config=config)
        statusPoller = StatusPoller(config=config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        newJobs = baAPI._loadByStatus(status='New')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Check WMBS
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        statusPoller.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        newJobs = baAPI._loadByStatus(status='New')
        self.assertEqual(len(newJobs), 0)

        newJobs = baAPI._loadByStatus(status='Idle')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Tracker should do nothing
        jobTracker.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Wait for jobs to timeout due to short Pending wait period
        time.sleep(12)

        statusPoller.algorithm()

        newJobs = baAPI._loadByStatus(status='Idle')
        self.assertEqual(len(newJobs), 0)

        newJobs = baAPI._loadByStatus(status='Timeout', complete='0')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Jobs should be gone
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)

        # Check if they're complete
        completeJobs = baAPI.getComplete()
        self.assertEqual(len(completeJobs), nSubs * nJobs)

        # Because they timed out, they all should have failed
        jobTracker.algorithm()

        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), 0)

        result = getJobsAction.execute(state='JobFailed', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return
Exemple #33
0
    def testA_BasicTest(self):
        """
        Use the CondorGlobusPlugin to create a very simple test
        Check to see that all the jobs were submitted
        Parse and test the JDL files
        See what condor says
        """
        workloadName = "basicWorkload"

        myThread = threading.currentThread()

        workload = self.createTestWorkload()

        config = self.getConfig()

        changeState = ChangeState(config)

        nSubs = 1
        nJobs = 10
        cacheDir = os.path.join(self.testDir, "CacheDir")

        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site="se.T2_US_UCSD",
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        jobSubmitter = JobSubmitterPoller(config=config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        result = getJobsAction.execute(state="Created", jobType="Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state="Executing", jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        for id in result:
            loc = getLocationAction.execute(jobid=id)
            self.assertEqual(loc, [["T2_US_UCSD"]])

        # Check on the JDL
        submitFile = None
        for file in os.listdir(config.JobSubmitter.submitDir):
            if re.search("submit", file):
                submitFile = file
        self.assertTrue(submitFile != None)
        self.checkJDL(config=config, cacheDir=cacheDir, submitFile=submitFile, site="T2_US_UCSD")

        # if os.path.exists('CacheDir'):
        #    shutil.rmtree('CacheDir')
        # shutil.copytree(self.testDir, 'CacheDir')

        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nJobs * nSubs)

        # This should do nothing
        jobGroupList = self.createJobGroups(
            nSubs=nSubs,
            nJobs=nJobs,
            task=workload.getTask("ReReco"),
            workloadSpec=os.path.join(self.testDir, "workloadTest", workloadName),
            site="se.T2_US_UCSD",
        )
        for group in jobGroupList:
            changeState.propagate(group.jobs, "created", "new")
        jobSubmitter.algorithm()

        # Now clean-up
        command = ["condor_rm", self.user]
        pipe = Popen(command, stdout=PIPE, stderr=PIPE, shell=False)
        pipe.communicate()

        del jobSubmitter

        return
Exemple #34
0
    def testA_BasicTest(self):
        """
        Use the MockPlugin to create a simple test
        Check to see that all the jobs were "submitted",
        don't care about thresholds
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 20
        site = 'T2_US_UCSD'

        self.setResourceThresholds(site, pendingSlots = 50, runningSlots = 100, tasks = ['Processing', 'Merge'],
                                   Processing = {'pendingSlots' : 50, 'runningSlots' : 100},
                                   Merge = {'pendingSlots' : 50, 'runningSlots' : 100})

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % site)
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter = JobSubmitterPoller(config = config)
        jobSubmitter.algorithm()

        # Check that jobs are in the right state
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid = jobId)
            self.assertEqual(loc, [['T2_US_UCSD']])

        # Run another cycle, it shouldn't submit anything. There isn't anything to submit
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        nSubs = 1
        nJobs = 10

        # Submit another 10 jobs
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % site,
                                            taskType = "Merge")
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Check that the jobs are available for submission and run another cycle
        result = getJobsAction.execute(state = 'Created', jobType = "Merge")
        self.assertEqual(len(result), nSubs * nJobs)
        jobSubmitter.algorithm()

        #Check that the last 10 jobs were submitted as well.
        result = getJobsAction.execute(state = 'Created', jobType = "Merge")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Merge")
        self.assertEqual(len(result), nSubs * nJobs)

        return
    def testD_PrototypeChain(self):
        """
        _PrototypeChain_

        Prototype the BossAir workflow
        """
        myThread = threading.currentThread()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))


        config = self.getConfig()
        config.BossAir.pluginName = 'CondorPlugin'

        baAPI  = BossAirAPI(config = config)

        workload = self.createTestWorkload()

        workloadName = "basicWorkload"

        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10

        cacheDir = os.path.join(self.testDir, 'CacheDir')

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir,
                                                                        'workloadTest',
                                                                        workloadName),
                                            site = 'se.T2_US_UCSD')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')


        jobSubmitter = JobSubmitterPoller(config = config)
        jobTracker   = JobTrackerPoller(config = config)
        statusPoller = StatusPoller(config = config)

        jobSubmitter.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        newJobs = baAPI._loadByStatus(status = 'New')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Check WMBS
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        statusPoller.algorithm()

        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, nSubs * nJobs)

        newJobs = baAPI._loadByStatus(status = 'New')
        self.assertEqual(len(newJobs), 0)

        newJobs = baAPI._loadByStatus(status = 'Idle')
        self.assertEqual(len(newJobs), nSubs * nJobs)


        # Tracker should do nothing
        jobTracker.algorithm()

        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)


        # Wait for jobs to timeout due to short Pending wait period
        time.sleep(12)


        statusPoller.algorithm()

        newJobs = baAPI._loadByStatus(status = 'Idle')
        self.assertEqual(len(newJobs), 0)

        newJobs = baAPI._loadByStatus(status = 'Timeout', complete = '0')
        self.assertEqual(len(newJobs), nSubs * nJobs)

        # Jobs should be gone
        nRunning = getCondorRunningJobs(self.user)
        self.assertEqual(nRunning, 0)


        # Check if they're complete
        completeJobs = baAPI.getComplete()
        self.assertEqual(len(completeJobs), nSubs * nJobs)


        # Because they timed out, they all should have failed
        jobTracker.algorithm()

        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 0)

        result = getJobsAction.execute(state = 'JobFailed', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        return
Exemple #36
0
    def testB_thresholdTest(self):
        """
        _testB_thresholdTest_

        Check that the threshold management is working,
        this requires checks on pending/running jobs globally
        at a site and per task/site
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 5
        nJobs = 10
        sites = ['T1_US_FNAL']

        for site in sites:
            self.setResourceThresholds(site, pendingSlots = 50, runningSlots = 200, tasks = ['Processing', 'Merge'],
                                       Processing = {'pendingSlots' : 45, 'runningSlots' :-1},
                                       Merge = {'pendingSlots' : 10, 'runningSlots' : 20, 'priority' : 5})

        # Always initialize the submitter after setting the sites, flaky!
        jobSubmitter = JobSubmitterPoller(config = config)

        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Do pre-submit check
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        jobSubmitter.algorithm()

        # Check that jobs are in the right state, 
        # here we are limited by the pending threshold for the Processing task (45)
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 45)

        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        for jobId in result:
            loc = getLocationAction.execute(jobid = jobId)
            self.assertEqual(loc, [['T1_US_FNAL']])

        # Run another cycle, it shouldn't submit anything. Jobs are still in pending
        jobSubmitter.algorithm()
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 45)

        # Now put 10 Merge jobs, only 5 can be submitted, there we hit the global pending threshold for the site
        nSubs = 1
        nJobs = 10
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            taskType = 'Merge')
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()
        result = getJobsAction.execute(state = 'Created', jobType = "Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Executing', jobType = "Merge")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 5)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 45)

        # Now let's test running thresholds
        # The scenario will be setup as follows: Move all current jobs as running
        # Create 300 Processing jobs and 300 merge jobs
        # Run 5 polling cycles, moving all pending jobs to running in between
        # Result is, merge is left at 25 running 0 pending and processing is left at 215 running 0 pending
        # Processing has 135 jobs in queue and Merge 285
        # This tests all threshold dynamics including the prioritization of merge over processing
        nSubs = 1
        nJobs = 300
        jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL')
        jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs,
                                            task = workload.getTask("ReReco"),
                                            workloadSpec = os.path.join(self.testDir, 'workloadTest',
                                                                        workloadName),
                                            site = 'se.%s' % 'T1_US_FNAL',
                                            taskType = 'Merge'))
        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        getRunJobID = self.baDaoFactory(classname = "LoadByWMBSID")
        setRunJobStatus = self.baDaoFactory(classname = "SetStatus")

        for _ in range(5):
            result = getJobsAction.execute(state = 'Executing')
            binds = []
            for jobId in result:
                binds.append({'id' : jobId, 'retry_count' : 0})
            runJobIds = getRunJobID.execute(binds)
            setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running')
            jobSubmitter.algorithm()

        result = getJobsAction.execute(state = 'Executing', jobType = 'Processing')
        self.assertEqual(len(result), 215)
        result = getJobsAction.execute(state = 'Created', jobType = 'Processing')
        self.assertEqual(len(result), 135)
        result = getJobsAction.execute(state = 'Executing', jobType = 'Merge')
        self.assertEqual(len(result), 25)
        result = getJobsAction.execute(state = 'Created', jobType = 'Merge')
        self.assertEqual(len(result), 285)

        return
Exemple #37
0
    def testA_StraightThrough(self):
        """
        _StraightThrough_

        Just run everything straight through without any variations
        """
        # Do pre-submit job check
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        myThread = threading.currentThread()
        workload = self.createTestWorkload()
        config   = self.getConfig()


        name         = 'WMAgent_Test1'
        site         = self.sites[0]
        nSubs        = 5
        nFiles       = 10
        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        # Create a collection of files
        self.createFileCollection(name = name, nSubs = nSubs,
                                  nFiles = nFiles,
                                  workflowURL = workloadPath,
                                  site = site)



        ############################################################
        # Test the JobCreator


        config.Agent.componentName = 'JobCreator'
        testJobCreator = JobCreatorPoller(config = config)

        testJobCreator.algorithm()
        time.sleep(5)


        # Did all jobs get created?
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), nSubs*nFiles)


        # Count database objects
        result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall()
        self.assertEqual(len(result), nSubs * nFiles)


        # Find the test directory
        testDirectory = os.path.join(self.testDir, 'TestWorkload', 'ReReco')
        self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory))
        self.assertTrue(len(os.listdir(testDirectory)) <= 20)

        groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0')

        # First job should be in here
        self.assertTrue('job_1' in os.listdir(groupDirectory))
        jobFile = os.path.join(groupDirectory, 'job_1', 'job.pkl')
        self.assertTrue(os.path.isfile(jobFile))
        f = open(jobFile, 'r')
        job = cPickle.load(f)
        f.close()


        self.assertEqual(job['workflow'], name)
        self.assertEqual(len(job['input_files']), 1)
        self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2')










        ###############################################################
        # Now test the JobSubmitter

        config.Agent.componentName = 'JobSubmitter'
        testJobSubmitter = JobSubmitterPoller(config = config)


        testJobSubmitter.algorithm()


        # Check that jobs are in the right state
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)



        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        for id in result:
            loc = getLocationAction.execute(jobid = id)
            self.assertEqual(loc, [[site]])


        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, nFiles * nSubs)


        #################################################################
        # Now the JobTracker


        config.Agent.componentName = 'JobTracker'
        testJobTracker = JobTrackerPoller(config = config)
        testJobTracker.setup()

        testJobTracker.algorithm()

        # Running the algo without removing the jobs should do nothing
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)


        condorRM()
        time.sleep(1)

        # All jobs gone?
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, 0)


        testJobTracker.algorithm()
        time.sleep(5)

        # Running the algo without removing the jobs should do nothing
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)




        #################################################################
        # Now the JobAccountant

        # First you need to load all jobs


        self.getFWJRAction = self.daoFactory(classname = "Jobs.GetFWJRByState")
        completeJobs       = self.getFWJRAction.execute(state = "complete")


        # Create reports for all jobs
        self.createReports(jobs = completeJobs, retryCount = 0)






        config.Agent.componentName = 'JobAccountant'
        testJobAccountant = JobAccountantPoller(config = config)
        testJobAccountant.setup()


        # It should do something with the jobs
        testJobAccountant.algorithm()


        # All the jobs should be done now
        result = getJobsAction.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Success', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)



        #######################################################################
        # Now the JobArchiver


        config.Agent.componentName = 'JobArchiver'
        testJobArchiver = JobArchiverPoller(config = config)


        testJobArchiver.algorithm()

        # All the jobs should be cleaned up
        result = getJobsAction.execute(state = 'Success', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)


        logDir = os.path.join(self.testDir, 'logs')

        for job in completeJobs:
            self.assertFalse(os.path.exists(job['fwjr_path']))
            jobFolder = 'JobCluster_%i' \
                    % (int(job['id']/config.JobArchiver.numberOfJobsToCluster))
            jobPath = os.path.join(logDir, jobFolder, 'Job_%i.tar' %(job['id']))
            self.assertTrue(os.path.isfile(jobPath))
            self.assertTrue(os.path.getsize(jobPath) > 0)




        ###########################################################################
        # Now the TaskAchiver


        config.Agent.componentName = 'TaskArchiver'
        testTaskArchiver = TaskArchiverPoller(config = config)


        testTaskArchiver.algorithm()


        result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing")
        self.assertEqual(len(result), 0)


        for jdict in completeJobs:
            job = Job(id = jdict['id'])
            self.assertFalse(job.exists())





        if os.path.isdir('testDir'):
            shutil.rmtree('testDir')
        shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir'))




        return
Exemple #38
0
    def testD_WhiteListBlackList(self):
        """
        _testD_WhiteListBlackList_

        Test the whitelist/blacklist implementation
        Trust the jobCreator to get this in the job right
        """
        workloadName = "basicWorkload"
        workload = self.createTestWorkload()
        config = self.getConfig()
        changeState = ChangeState(config)

        nSubs = 2
        nJobs = 10

        sites = [
            'T2_US_Florida', 'T2_TW_Taiwan', 'T2_CH_CERN', 'T3_CO_Uniandes'
        ]

        for site in sites:
            self.setResourceThresholds(site,
                                       pendingSlots=1000,
                                       runningSlots=-1,
                                       tasks=['Processing', 'Merge'],
                                       Processing={
                                           'pendingSlots': 5000,
                                           'runningSlots': -1
                                       },
                                       Merge={
                                           'pendingSlots': 1000,
                                           'runningSlots': -1,
                                           'priority': 5
                                       })

        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            site='se.%s' % sites[-1],
                                            task=workload.getTask("ReReco"),
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            bl=sites[:-1])

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter = JobSubmitterPoller(config=config)

        # Actually run it
        jobSubmitter.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # All jobs should be at T3_CO_Uniandes
        # Check assigned locations
        getLocationAction = self.daoFactory(classname="Jobs.GetLocation")
        locationDict = getLocationAction.execute([{
            'jobid': x
        } for x in result])
        for entry in locationDict:
            loc = entry['site_name']
            self.assertEqual(loc, 'T3_CO_Uniandes')

        # Run again and test the whiteList
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            site='se.%s' % 'T2_CH_CERN',
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            wl=['T2_CH_CERN'])

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        # Run it
        jobSubmitter.algorithm()

        # You'll have jobs from the previous run still in the database
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        # All jobs should be at CERN or Uniandes
        locationDict = getLocationAction.execute([{
            'jobid': x
        } for x in result])
        for entry in locationDict[nSubs * nJobs:]:
            loc = entry['site_name']
            self.assertEqual(loc, 'T2_CH_CERN')

        # Run again with an invalid whitelist
        # After this point, the original two sets of jobs will be executing
        # The rest of the jobs should move to submitFailed
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            site='se.%s' % 'T2_CH_CERN',
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            wl=['T2_US_Namibia'])

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Jobs should be gone
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state='SubmitFailed',
                                       jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs)

        # Run again with all sites blacklisted
        jobGroupList = self.createJobGroups(nSubs=nSubs,
                                            nJobs=nJobs,
                                            task=workload.getTask("ReReco"),
                                            site=['se.%s' % x for x in sites],
                                            workloadSpec=os.path.join(
                                                self.testDir, 'workloadTest',
                                                workloadName),
                                            bl=sites)

        for group in jobGroupList:
            changeState.propagate(group.jobs, 'created', 'new')

        jobSubmitter.algorithm()

        # Jobs should go to submit failed
        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Executing', jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)
        result = getJobsAction.execute(state='SubmitFailed',
                                       jobType="Processing")
        self.assertEqual(len(result), nSubs * nJobs * 2)

        return