예제 #1
0
    def testD_Timing(self):
        """
        _Timing_

        This is to see how fast things go.
        """
        myThread = threading.currentThread()

        name = makeUUID()

        config = self.getConfig()
        jobList = self.createGiantJobSet(name=name,
                                         config=config,
                                         nSubs=10,
                                         nJobs=1000,
                                         nFiles=10)

        testTaskArchiver = TaskArchiverPoller(config=config)

        startTime = time.time()
        testTaskArchiver.algorithm()
        stopTime = time.time()

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)
        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        logging.info("TaskArchiver took %f seconds", (stopTime - startTime))
예제 #2
0
파일: WMAgent_t.py 프로젝트: ticoann/WMCore
    def testA_StraightThrough(self):
        """
        _StraightThrough_

        Just run everything straight through without any variations
        """
        # Do pre-submit job check
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        myThread = threading.currentThread()
        workload = self.createTestWorkload()
        config   = self.getConfig()


        name         = 'WMAgent_Test1'
        site         = self.sites[0]
        nSubs        = 5
        nFiles       = 10
        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        # Create a collection of files
        self.createFileCollection(name = name, nSubs = nSubs,
                                  nFiles = nFiles,
                                  workflowURL = workloadPath,
                                  site = site)



        ############################################################
        # Test the JobCreator


        config.Agent.componentName = 'JobCreator'
        testJobCreator = JobCreatorPoller(config = config)

        testJobCreator.algorithm()
        time.sleep(5)


        # Did all jobs get created?
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), nSubs*nFiles)


        # Count database objects
        result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall()
        self.assertEqual(len(result), nSubs * nFiles)


        # Find the test directory
        testDirectory = os.path.join(self.testDir, 'TestWorkload', 'ReReco')
        self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory))
        self.assertTrue(len(os.listdir(testDirectory)) <= 20)

        groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0')

        # First job should be in here
        self.assertTrue('job_1' in os.listdir(groupDirectory))
        jobFile = os.path.join(groupDirectory, 'job_1', 'job.pkl')
        self.assertTrue(os.path.isfile(jobFile))
        f = open(jobFile, 'r')
        job = cPickle.load(f)
        f.close()


        self.assertEqual(job['workflow'], name)
        self.assertEqual(len(job['input_files']), 1)
        self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2')










        ###############################################################
        # Now test the JobSubmitter

        config.Agent.componentName = 'JobSubmitter'
        testJobSubmitter = JobSubmitterPoller(config = config)


        testJobSubmitter.algorithm()


        # Check that jobs are in the right state
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)



        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        for id in result:
            loc = getLocationAction.execute(jobid = id)
            self.assertEqual(loc, [[site]])


        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, nFiles * nSubs)


        #################################################################
        # Now the JobTracker


        config.Agent.componentName = 'JobTracker'
        testJobTracker = JobTrackerPoller(config = config)
        testJobTracker.setup()

        testJobTracker.algorithm()

        # Running the algo without removing the jobs should do nothing
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)


        condorRM()
        time.sleep(1)

        # All jobs gone?
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, 0)


        testJobTracker.algorithm()
        time.sleep(5)

        # Running the algo without removing the jobs should do nothing
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)




        #################################################################
        # Now the JobAccountant

        # First you need to load all jobs


        self.getFWJRAction = self.daoFactory(classname = "Jobs.GetFWJRByState")
        completeJobs       = self.getFWJRAction.execute(state = "complete")


        # Create reports for all jobs
        self.createReports(jobs = completeJobs, retryCount = 0)






        config.Agent.componentName = 'JobAccountant'
        testJobAccountant = JobAccountantPoller(config = config)
        testJobAccountant.setup()


        # It should do something with the jobs
        testJobAccountant.algorithm()


        # All the jobs should be done now
        result = getJobsAction.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Success', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)



        #######################################################################
        # Now the JobArchiver


        config.Agent.componentName = 'JobArchiver'
        testJobArchiver = JobArchiverPoller(config = config)


        testJobArchiver.algorithm()

        # All the jobs should be cleaned up
        result = getJobsAction.execute(state = 'Success', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)


        logDir = os.path.join(self.testDir, 'logs')

        for job in completeJobs:
            self.assertFalse(os.path.exists(job['fwjr_path']))
            jobFolder = 'JobCluster_%i' \
                    % (int(job['id']/config.JobArchiver.numberOfJobsToCluster))
            jobPath = os.path.join(logDir, jobFolder, 'Job_%i.tar' %(job['id']))
            self.assertTrue(os.path.isfile(jobPath))
            self.assertTrue(os.path.getsize(jobPath) > 0)




        ###########################################################################
        # Now the TaskAchiver


        config.Agent.componentName = 'TaskArchiver'
        testTaskArchiver = TaskArchiverPoller(config = config)


        testTaskArchiver.algorithm()


        result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing")
        self.assertEqual(len(result), 0)


        for jdict in completeJobs:
            job = Job(id = jdict['id'])
            self.assertFalse(job.exists())





        if os.path.isdir('testDir'):
            shutil.rmtree('testDir')
        shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir'))




        return
예제 #3
0
    def testE_multicore(self):
        """
        _multicore_

        Create a workload summary based on the multicore job report
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=False,
                                               multicore=True)

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        dbname = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0, "No job should have survived")
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        workloadSummary = workdatabase.document(id="TestWorkload")

        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['minMergeTime']['average'],
            5.7624950408900002,
            places=2)
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['numberOfMerges']['average'],
            3.0,
            places=2)
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['averageProcessTime']['average'],
            29.369966666700002,
            places=2)
        return
예제 #4
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=True)

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        dbname = getattr(config.JobStateMachine, "couchDBName")
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id=workload.name())

        self.assertEqual(
            workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'],
            500)
        self.assertTrue(workloadSummary['errors']['/TestWorkload/ReReco']
                        ['cmsRun1'].has_key('99999'))
        self.assertEquals(
            workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']
            ['99999']['runs'], {'10': [12312]},
            "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10)
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['average']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['average']['99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['average']['8020'], 10)
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['stdDev']['Failed Jobs'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['stdDev']['99999'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['stdDev']['8020'], 0)
        return
예제 #5
0
    def testA_BasicFunctionTest(self):
        """
        _BasicFunctionTest_

        Tests the components, by seeing if they can process a simple set of closeouts
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=False)

        # Create second workload
        testJobGroup2 = self.createTestJobGroup(
            config=config,
            name=workload.name(),
            filesetName="TestFileset_2",
            specLocation=workloadPath,
            task="/TestWorkload/ReReco/LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        cachePath2 = os.path.join(config.JobCreator.jobCacheDir,
                                  "TestWorkload", "LogCollect")
        os.makedirs(cachePath2)
        self.assertTrue(os.path.exists(cachePath2))

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 2)

        workflowName = "TestWorkload"
        dbname = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)
        jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobs = jobdb.loadView("JobDump",
                              "jobsByWorkflowName",
                              options={
                                  "startkey": [workflowName],
                                  "endkey": [workflowName, {}]
                              })['rows']
        self.assertEqual(len(jobs), 2 * self.nJobs)

        from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase
        create = CreateWMBSBase()
        tables = []
        for x in create.requiredTables:
            tables.append(x[2:])

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_fileset")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        # Make sure we deleted the directory
        self.assertFalse(os.path.exists(cachePath))
        self.assertFalse(
            os.path.exists(
                os.path.join(self.testDir, 'workloadTest/TestWorkload')))

        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        workloadSummary = workdatabase.document(id="TestWorkload")
        # Check ACDC
        self.assertEqual(workloadSummary['ACDCServer'],
                         sanitizeURL(config.ACDC.couchurl)['url'])

        # Check the output
        self.assertEqual(workloadSummary['output'].keys(),
                         ['/Electron/MorePenguins-v0/RECO'])
        self.assertEqual(
            sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO']
                   ['tasks']),
            ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect'])
        # Check performance
        # Check histograms
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['histogram'][0]['average'],
            0.89405199999999996,
            places=2)
        self.assertEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['histogram'][0]['nEvents'], 10)

        # Check standard performance
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['TotalJobCPU']['average'],
            17.786300000000001,
            places=2)
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['TotalJobCPU']['stdDev'],
            0.0,
            places=2)

        # Check worstOffenders
        self.assertEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['worstOffenders'], [{
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 1
            }, {
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 1
            }, {
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 2
            }])

        # Check retryData
        self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'],
                         {'1': 10})
        logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar'
        self.assertEqual(workloadSummary['logArchives'], {
            '/TestWorkload/ReReco/LogCollect':
            [logCollectPFN for _ in range(10)]
        })

        # LogCollect task is made out of identical FWJRs
        # assert that it is identical
        for x in workloadSummary['performance'][
                '/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys():
            if x in config.TaskArchiver.histogramKeys:
                continue
            for y in ['average', 'stdDev']:
                self.assertAlmostEquals(
                    workloadSummary['performance']
                    ['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y],
                    workloadSummary['performance']['/TestWorkload/ReReco']
                    ['cmsRun1'][x][y],
                    places=2)

        return
예제 #6
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=True)
        # Create second workload
        testJobGroup2 = self.createTestJobGroup(
            config=config,
            name=workload.name(),
            filesetName="TestFileset_2",
            specLocation=workloadPath,
            task="/TestWorkload/ReReco/LogCollect",
            jobType="LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb = CouchServer(config.JobStateMachine.couchurl)
        jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView("JobDump",
                       "jobsByWorkflowName",
                       options={
                           "startkey": [workload.name()],
                           "endkey": [workload.name(), {}]
                       })['rows']
        fwjrdb.loadView("FWJRDump",
                        "fwjrsByWorkflowName",
                        options={
                            "startkey": [workload.name()],
                            "endkey": [workload.name(), {}]
                        })['rows']

        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        cleanCouch = CleanCouchPoller(config=config)
        cleanCouch.setup()
        cleanCouch.algorithm()

        dbname = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id=workload.name())

        self.assertEqual(
            workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'],
            500)
        self.assertTrue('99999' in workloadSummary['errors']
                        ['/TestWorkload/ReReco']['cmsRun1'])

        failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco'][
            'cmsRun1']['99999']['runs']
        self.assertEqual(
            failedRunInfo, {'10': [[12312, 12312]]},
            "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10)
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['average']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['average']['99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['average']['8020'], 10)
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']
            ['stdDev']['Failed Jobs'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['stdDev']['99999'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']
            ['cmsRun1']['errorsBySite']['stdDev']['8020'], 0)
        return