Esempio n. 1
0
    def atestTaskArchiverPollerAlertsSending_killSubscriptions(self):
        """
        Cause exception (alert-worthy situation) in
        the TaskArchiverPoller killSubscriptions method.
        (only 1 situation out of two tested).

        """
        return
        myThread = threading.currentThread()
        config = self.getConfig()
        testTaskArchiver = TaskArchiverPoller(config = config)

        # shall later be called directly from utils module
        handler, self.alertsReceiver = \
            utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr)

        # will fail on calling .load() - regardless, the same except block
        numAlerts = 3
        doneList = [{'id': x} for x in range(numAlerts)]
        # final re-raise is currently commented, so don't expect Exception here
        testTaskArchiver.killSubscriptions(doneList)
        # wait for the generated alert to arrive
        while len(handler.queue) < numAlerts:
            time.sleep(0.3)
            print "%s waiting for alert to arrive ..." % inspect.stack()[0][3]

        self.alertsReceiver.shutdown()
        self.alertsReceiver = None
        # now check if the alert was properly sent
        self.assertEqual(len(handler.queue), numAlerts)
        alert = handler.queue[0]
        self.assertEqual(alert["Source"], "TaskArchiverPoller")
        return
Esempio n. 2
0
    def atestTaskArchiverPollerAlertsSending_notifyWorkQueue(self):
        """
        Cause exception (alert-worthy situation) in
        the TaskArchiverPoller notifyWorkQueue method.

        """
        return
        myThread = threading.currentThread()
        config = self.getConfig()
        testTaskArchiver = TaskArchiverPoller(config=config)

        # shall later be called directly from utils module
        handler, self.alertsReceiver = \
            utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr)

        # prepare input such input which will go until where it expectantly
        # fails and shall send an alert
        # this will currently fail in the TaskArchiverPoller killSubscriptions
        # on trying to access .load() method which items of below don't have.
        # should anything change in the TaskArchiverPoller without modifying this
        # test accordingly, it may be failing ...
        print "failures 'AttributeError: 'dict' object has no attribute 'load' expected ..."
        subList = [{'id': 1}, {'id': 2}, {'id': 3}]
        testTaskArchiver.notifyWorkQueue(subList)
        # wait for the generated alert to arrive
        while len(handler.queue) < len(subList):
            time.sleep(0.3)
            print "%s waiting for alert to arrive ..." % inspect.stack()[0][3]

        self.alertsReceiver.shutdown()
        self.alertsReceiver = None
        # now check if the alert was properly sent (expect this many failures)
        self.assertEqual(len(handler.queue), len(subList))
        alert = handler.queue[0]
        self.assertEqual(alert["Source"], "TaskArchiverPoller")
Esempio n. 3
0
    def atestD_Timing(self):
        """
        _Timing_

        This is to see how fast things go.
        """

        return

        myThread = threading.currentThread()

        name = makeUUID()

        config = self.getConfig()
        jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10)

        testTaskArchiver = TaskArchiverPoller(config=config)

        startTime = time.time()
        testTaskArchiver.algorithm()
        stopTime = time.time()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)
        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        logging.info("TaskArchiver took %f seconds" % (stopTime - startTime))
Esempio n. 4
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = True)

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView("JobDump", "jobsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']
        fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']

        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()

        dbname       = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id = workload.name())

        self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500)
        self.assertTrue(workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'].has_key('99999'))

        failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs']
        for key, value in failedRunInfo.items():
            failedRunInfo[key] = list(set(value))
        self.assertEquals(failedRunInfo, {'10' : [12312]},
                          "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['average']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['stdDev']['Failed Jobs'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['99999'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['8020'], 0)
        return
Esempio n. 5
0
    def atestB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, "specDir", "spec.pkl")
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(
            config=config, name=workload.name(), specLocation=workloadPath, error=True
        )

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        dbname = getattr(config.JobStateMachine, "couchDBName")
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)

        workloadSummary = workdatabase.document(id="TestWorkload")

        self.assertEqual(workloadSummary["/TestWorkload/ReReco"]["failureTime"], 500)
        self.assertTrue(workloadSummary["/TestWorkload/ReReco"]["cmsRun1"].has_key("99999"))
        return
Esempio n. 6
0
    def atestTaskArchiverPollerAlertsSending_killSubscriptions(self):
        """
        Cause exception (alert-worthy situation) in
        the TaskArchiverPoller killSubscriptions method.
        (only 1 situation out of two tested).

        """
        return
        myThread = threading.currentThread()
        config = self.getConfig()
        testTaskArchiver = TaskArchiverPoller(config=config)

        # shall later be called directly from utils module
        handler, self.alertsReceiver = \
            utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr)

        # will fail on calling .load() - regardless, the same except block
        numAlerts = 3
        doneList = [{'id': x} for x in range(numAlerts)]
        # final re-raise is currently commented, so don't expect Exception here
        testTaskArchiver.killSubscriptions(doneList)
        # wait for the generated alert to arrive
        while len(handler.queue) < numAlerts:
            time.sleep(0.3)
            print "%s waiting for alert to arrive ..." % inspect.stack()[0][3]

        self.alertsReceiver.shutdown()
        self.alertsReceiver = None
        # now check if the alert was properly sent
        self.assertEqual(len(handler.queue), numAlerts)
        alert = handler.queue[0]
        self.assertEqual(alert["Source"], "TaskArchiverPoller")
        return
Esempio n. 7
0
    def atestTaskArchiverPollerAlertsSending_notifyWorkQueue(self):
        """
        Cause exception (alert-worthy situation) in
        the TaskArchiverPoller notifyWorkQueue method.

        """
        return
        myThread = threading.currentThread()
        config = self.getConfig()
        testTaskArchiver = TaskArchiverPoller(config = config)

        # shall later be called directly from utils module
        handler, self.alertsReceiver = \
            utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr)

        # prepare input such input which will go until where it expectantly
        # fails and shall send an alert
        # this will currently fail in the TaskArchiverPoller killSubscriptions
        # on trying to access .load() method which items of below don't have.
        # should anything change in the TaskArchiverPoller without modifying this
        # test accordingly, it may be failing ...
        print "failures 'AttributeError: 'dict' object has no attribute 'load' expected ..."
        subList = [{'id': 1}, {'id': 2}, {'id': 3}]
        testTaskArchiver.notifyWorkQueue(subList)
        # wait for the generated alert to arrive
        while len(handler.queue) < len(subList):
            time.sleep(0.3)
            print "%s waiting for alert to arrive ..." % inspect.stack()[0][3]

        self.alertsReceiver.shutdown()
        self.alertsReceiver = None
        # now check if the alert was properly sent (expect this many failures)
        self.assertEqual(len(handler.queue), len(subList))
        alert = handler.queue[0]
        self.assertEqual(alert["Source"], "TaskArchiverPoller")
Esempio n. 8
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = True)

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView("JobDump", "jobsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']
        fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']

        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()

        dbname       = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id = workload.name())

        self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500)
        self.assertTrue(workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'].has_key('99999'))
        self.assertEquals(workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs'], {'10' : [12312]},
                          "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['average']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['stdDev']['Failed Jobs'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['99999'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['8020'], 0)
        return
Esempio n. 9
0
    def testE_multicore(self):
        """
        _multicore_

        Create a workload summary based on the multicore job report
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, "specDir", "spec.pkl")
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(
            config=config, name=workload.name(), specLocation=workloadPath, error=False, multicore=True
        )

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        dbname = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0, "No job should have survived")
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        workloadSummary = workdatabase.document(id="TestWorkload")

        self.assertAlmostEquals(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["minMergeTime"]["average"],
            5.7624950408900002,
            places=2,
        )
        self.assertAlmostEquals(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["numberOfMerges"]["average"],
            3.0,
            places=2,
        )
        self.assertAlmostEquals(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["averageProcessTime"]["average"],
            29.369966666700002,
            places=2,
        )
        return
Esempio n. 10
0
    def atestC_Profile(self):
        """
        _Profile_

        DON'T RUN THIS!
        """

        return

        import cProfile, pstats

        myThread = threading.currentThread()

        name = makeUUID()

        config = self.getConfig()

        jobList = self.createGiantJobSet(name=name,
                                         config=config,
                                         nSubs=10,
                                         nJobs=1000,
                                         nFiles=10)

        testTaskArchiver = TaskArchiverPoller(config=config)

        cProfile.runctx("testTaskArchiver.algorithm()",
                        globals(),
                        locals(),
                        filename="testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()

        return
Esempio n. 11
0
    def testE_multicore(self):
        """
        _multicore_

        Create a workload summary based on the multicore job report
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = False,
                                               multicore = True)

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        workflowName = "TestWorkload"
        dbname       = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)

        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()

        workloadSummary = workdatabase.document(id = "TestWorkload")

        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['minMergeTime']['average'],
                         5.7624950408900002)
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['numberOfMerges']['average'],
                         3.0)
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['averageProcessTime']['average'],
                         29.369966666700002)
        return
Esempio n. 12
0
    def atestD_Timing(self):
        """
        _Timing_

        This is to see how fast things go.
        """

        return

        myThread = threading.currentThread()

        name = makeUUID()

        config = self.getConfig()
        jobList = self.createGiantJobSet(name=name,
                                         config=config,
                                         nSubs=10,
                                         nJobs=1000,
                                         nFiles=10)

        testTaskArchiver = TaskArchiverPoller(config=config)

        startTime = time.time()
        testTaskArchiver.algorithm()
        stopTime = time.time()

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)
        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        logging.info("TaskArchiver took %f seconds" % (stopTime - startTime))
Esempio n. 13
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = True)

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()

        dbname       = getattr(config.JobStateMachine, "couchDBName")
        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id = workload.name())

        self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500)
        self.assertTrue(workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'].has_key('99999'))
        self.assertEquals(workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs'], {'10' : [12312]},
                          "Wrong lumi information in the summary for failed jobs")
        return
Esempio n. 14
0
    def preInitialization(self):
        print "TaskArchiver.preInitialization"

        # Add event loop to worker manager
        myThread = threading.currentThread()

        pollInterval = self.config.TaskArchiver.pollInterval
        logging.info("Setting task archiver poll interval to %s seconds" %
                     pollInterval)
        myThread.workerThreadManager.addWorker(TaskArchiverPoller(self.config),
                                               pollInterval)

        couchInterval = self.config.TaskArchiver.cleanCouchInterval
        logging.info(
            "Setting poll interval for cleanup old couch doc to %s seconds" %
            couchInterval)
        myThread.workerThreadManager.addWorker(CleanCouchPoller(self.config),
                                               couchInterval)

        return
Esempio n. 15
0
    def testA_StraightThrough(self):
        """
        _StraightThrough_

        Just run everything straight through without any variations
        """
        # Do pre-submit job check
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, 0, "User currently has %i running jobs.  Test will not continue" % (nRunning))

        myThread = threading.currentThread()
        workload = self.createTestWorkload()
        config   = self.getConfig()


        name         = 'WMAgent_Test1'
        site         = self.sites[0]
        nSubs        = 5
        nFiles       = 10
        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        # Create a collection of files
        self.createFileCollection(name = name, nSubs = nSubs,
                                  nFiles = nFiles,
                                  workflowURL = workloadPath,
                                  site = site)



        ############################################################
        # Test the JobCreator


        config.Agent.componentName = 'JobCreator'
        testJobCreator = JobCreatorPoller(config = config)

        testJobCreator.algorithm()
        time.sleep(5)


        # Did all jobs get created?
        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), nSubs*nFiles)


        # Count database objects
        result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall()
        self.assertEqual(len(result), nSubs * nFiles)


        # Find the test directory
        testDirectory = os.path.join(self.testDir, 'TestWorkload', 'ReReco')
        self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory))
        self.assertTrue(len(os.listdir(testDirectory)) <= 20)

        groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0')

        # First job should be in here
        self.assertTrue('job_1' in os.listdir(groupDirectory))
        jobFile = os.path.join(groupDirectory, 'job_1', 'job.pkl')
        self.assertTrue(os.path.isfile(jobFile))
        f = open(jobFile, 'r')
        job = cPickle.load(f)
        f.close()


        self.assertEqual(job['workflow'], name)
        self.assertEqual(len(job['input_files']), 1)
        self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2')










        ###############################################################
        # Now test the JobSubmitter

        config.Agent.componentName = 'JobSubmitter'
        testJobSubmitter = JobSubmitterPoller(config = config)


        testJobSubmitter.algorithm()


        # Check that jobs are in the right state
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)



        # Check assigned locations
        getLocationAction = self.daoFactory(classname = "Jobs.GetLocation")
        for id in result:
            loc = getLocationAction.execute(jobid = id)
            self.assertEqual(loc, [[site]])


        # Check to make sure we have running jobs
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, nFiles * nSubs)


        #################################################################
        # Now the JobTracker


        config.Agent.componentName = 'JobTracker'
        testJobTracker = JobTrackerPoller(config = config)
        testJobTracker.setup()

        testJobTracker.algorithm()

        # Running the algo without removing the jobs should do nothing
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)


        condorRM()
        time.sleep(1)

        # All jobs gone?
        nRunning = getCondorRunningJobs()
        self.assertEqual(nRunning, 0)


        testJobTracker.algorithm()
        time.sleep(5)

        # Running the algo without removing the jobs should do nothing
        result = getJobsAction.execute(state = 'Executing', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)




        #################################################################
        # Now the JobAccountant

        # First you need to load all jobs


        self.getFWJRAction = self.daoFactory(classname = "Jobs.GetFWJRByState")
        completeJobs       = self.getFWJRAction.execute(state = "complete")


        # Create reports for all jobs
        self.createReports(jobs = completeJobs, retryCount = 0)






        config.Agent.componentName = 'JobAccountant'
        testJobAccountant = JobAccountantPoller(config = config)
        testJobAccountant.setup()


        # It should do something with the jobs
        testJobAccountant.algorithm()


        # All the jobs should be done now
        result = getJobsAction.execute(state = 'Complete', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Success', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)



        #######################################################################
        # Now the JobArchiver


        config.Agent.componentName = 'JobArchiver'
        testJobArchiver = JobArchiverPoller(config = config)


        testJobArchiver.algorithm()

        # All the jobs should be cleaned up
        result = getJobsAction.execute(state = 'Success', jobType = "Processing")
        self.assertEqual(len(result), 0)
        result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing")
        self.assertEqual(len(result), nSubs * nFiles)


        logDir = os.path.join(self.testDir, 'logs')

        for job in completeJobs:
            self.assertFalse(os.path.exists(job['fwjr_path']))
            jobFolder = 'JobCluster_%i' \
                    % (int(job['id']/config.JobArchiver.numberOfJobsToCluster))
            jobPath = os.path.join(logDir, jobFolder, 'Job_%i.tar' %(job['id']))
            self.assertTrue(os.path.isfile(jobPath))
            self.assertTrue(os.path.getsize(jobPath) > 0)




        ###########################################################################
        # Now the TaskAchiver


        config.Agent.componentName = 'TaskArchiver'
        testTaskArchiver = TaskArchiverPoller(config = config)


        testTaskArchiver.algorithm()


        result = getJobsAction.execute(state = 'Cleanout', jobType = "Processing")
        self.assertEqual(len(result), 0)


        for jdict in completeJobs:
            job = Job(id = jdict['id'])
            self.assertFalse(job.exists())





        if os.path.isdir('testDir'):
            shutil.rmtree('testDir')
        shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir'))




        return
Esempio n. 16
0
    def testA_BasicFunctionTest(self):
        """
        _BasicFunctionTest_

        Tests the components, by seeing if they can process a simple set of closeouts
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = False)

        # Create second workload
        testJobGroup2 = self.createTestJobGroup(config = config,
                                                name = workload.name(),
                                                filesetName = "TestFileset_2",
                                                specLocation = workloadPath,
                                                task = "/TestWorkload/ReReco/LogCollect", 
                                                type = "LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        cachePath2 = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "LogCollect")
        os.makedirs(cachePath2)
        self.assertTrue(os.path.exists(cachePath2))

        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 2)

        workflowName = "TestWorkload"
        dbname       = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobs = jobdb.loadView("JobDump", "jobsByWorkflowName",
                              options = {"startkey": [workflowName],
                                         "endkey": [workflowName, {}]})['rows']
        fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                        options = {"startkey": [workflowName],
                                   "endkey": [workflowName, {}]})['rows']

        self.assertEqual(len(jobs), 2*self.nJobs)

        from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase
        create = CreateWMBSBase()
        tables = []
        for x in create.requiredTables:
            tables.append(x[2:])
 
        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()
        
        cleanCouch = CleanCouchPoller(config = config)
        cleanCouch.setup()
        cleanCouch.algorithm()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_fileset")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        # Make sure we deleted the directory
        self.assertFalse(os.path.exists(cachePath))
        self.assertFalse(os.path.exists(os.path.join(self.testDir, 'workloadTest/TestWorkload')))

        testWMBSFileset = Fileset(id = 1)
        self.assertEqual(testWMBSFileset.exists(), False)



        workloadSummary = workdatabase.document(id = "TestWorkload")
        # Check ACDC
        self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url'])

        # Check the output
        self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO'])
        self.assertEqual(sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO']['tasks']),
                        ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect'])
        # Check performance
        # Check histograms
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['average'],
                                0.89405199999999996, places = 2)
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['nEvents'],
                         10)

        # Check standard performance
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['average'], 17.786300000000001,
                                places = 2)
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['stdDev'], 0.0,
                                places = 2)

        # Check worstOffenders
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['worstOffenders'],
                         [{'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1},
                          {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1},
                          {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2}])

        # Check retryData
        self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'1': 10})
        logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar'
        self.assertEqual(workloadSummary['logArchives'], {'/TestWorkload/ReReco/LogCollect' : [logCollectPFN for _ in range(10)]})

        # LogCollect task is made out of identical FWJRs
        # assert that it is identical
        for x in workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys():
            if x in config.TaskArchiver.histogramKeys:
                continue
            for y in ['average', 'stdDev']:
                self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y],
                                        workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'][x][y],
                                        places = 2)

        return
Esempio n. 17
0
    def testE_multicore(self):
        """
        _multicore_

        Create a workload summary based on the multicore job report
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=False,
                                               multicore=True)

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        dbname = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0, "No job should have survived")
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        workloadSummary = workdatabase.document(id="TestWorkload")

        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['minMergeTime']['average'],
            5.7624950408900002,
            places=2)
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['numberOfMerges']['average'],
            3.0,
            places=2)
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['averageProcessTime']['average'],
            29.369966666700002,
            places=2)
        return
Esempio n. 18
0
    def testA_BasicFunctionTest(self):
        """
        _BasicFunctionTest_
        
        Tests the components, by seeing if they can process a simple set of closeouts
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, "specDir", "spec.pkl")
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(
            config=config, name=workload.name(), specLocation=workloadPath, error=False
        )

        # Create second workload
        testJobGroup2 = self.createTestJobGroup(
            config=config,
            name="%s_2" % workload.name(),
            specLocation=workloadPath,
            task="/TestWorkload/ReReco/LogCollect",
        )

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        cachePath2 = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "LogCollect")
        os.makedirs(cachePath2)
        self.assertTrue(os.path.exists(cachePath2))

        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 2)

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_fileset")[0].fetchall()
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        # Make sure we deleted the directory
        self.assertFalse(os.path.exists(cachePath))
        self.assertFalse(os.path.exists(os.path.join(self.testDir, "workloadTest/TestWorkload")))

        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        dbname = getattr(config.JobStateMachine, "couchDBName")
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)

        workloadSummary = workdatabase.document(id="TestWorkload")
        # Check ACDC
        self.assertEqual(workloadSummary["ACDCServer"], sanitizeURL(config.ACDC.couchurl)["url"])

        # Check the output
        self.assertEqual(
            workloadSummary["output"].keys(), ["/Electron/MorePenguins-v0/RECO", "/Electron/MorePenguins-v0/ALCARECO"]
        )

        # Check performance
        # Check histograms
        self.assertEqual(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["AvgEventTime"]["histogram"][0][
                "average"
            ],
            0.062651899999999996,
        )
        self.assertEqual(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["AvgEventTime"]["histogram"][0][
                "nEvents"
            ],
            5,
        )

        # Check standard performance
        self.assertEqual(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["TotalJobCPU"]["average"],
            9.4950600000000005,
        )
        self.assertEqual(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["TotalJobCPU"]["stdDev"],
            8.2912400000000002,
        )

        # Check worstOffenders
        self.assertEqual(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["AvgEventTime"]["worstOffenders"],
            [
                {"logCollect": None, "log": None, "value": "0.894052", "jobID": 1},
                {"logCollect": None, "log": None, "value": "0.894052", "jobID": 2},
                {"logCollect": None, "log": None, "value": "0.894052", "jobID": 3},
            ],
        )

        # LogCollect task is made out of identical FWJRs
        # assert that it is identical
        for x in workloadSummary["performance"]["/TestWorkload/ReReco/LogCollect"]["cmsRun1"].keys():
            if x in config.TaskArchiver.histogramKeys:
                continue
            for y in ["average", "stdDev"]:
                self.assertEqual(
                    workloadSummary["performance"]["/TestWorkload/ReReco/LogCollect"]["cmsRun1"][x][y],
                    workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"][x][y],
                )

        return
Esempio n. 19
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, "specDir", "spec.pkl")
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(
            config=config, name=workload.name(), specLocation=workloadPath, error=True
        )

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb = CouchServer(config.JobStateMachine.couchurl)
        jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView(
            "JobDump", "jobsByWorkflowName", options={"startkey": [workload.name()], "endkey": [workload.name(), {}]}
        )["rows"]
        fwjrdb.loadView(
            "FWJRDump", "fwjrsByWorkflowName", options={"startkey": [workload.name()], "endkey": [workload.name(), {}]}
        )["rows"]

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        dbname = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id=workload.name())

        self.assertEqual(workloadSummary["errors"]["/TestWorkload/ReReco"]["failureTime"], 500)
        self.assertTrue(workloadSummary["errors"]["/TestWorkload/ReReco"]["cmsRun1"].has_key("99999"))
        self.assertEquals(
            workloadSummary["errors"]["/TestWorkload/ReReco"]["cmsRun1"]["99999"]["runs"],
            {"10": [12312]},
            "Wrong lumi information in the summary for failed jobs",
        )

        # Check the failures by site histograms
        self.assertEqual(
            workloadSummary["histograms"]["workflowLevel"]["failuresBySite"]["data"]["T1_IT_CNAF"]["Failed Jobs"], 10
        )
        self.assertEqual(
            workloadSummary["histograms"]["stepLevel"]["/TestWorkload/ReReco"]["cmsRun1"]["errorsBySite"]["data"][
                "T1_IT_CNAF"
            ]["99999"],
            10,
        )
        self.assertEqual(
            workloadSummary["histograms"]["stepLevel"]["/TestWorkload/ReReco"]["cmsRun1"]["errorsBySite"]["data"][
                "T1_IT_CNAF"
            ]["8020"],
            10,
        )
        self.assertEqual(workloadSummary["histograms"]["workflowLevel"]["failuresBySite"]["average"]["Failed Jobs"], 10)
        self.assertEqual(
            workloadSummary["histograms"]["stepLevel"]["/TestWorkload/ReReco"]["cmsRun1"]["errorsBySite"]["average"][
                "99999"
            ],
            10,
        )
        self.assertEqual(
            workloadSummary["histograms"]["stepLevel"]["/TestWorkload/ReReco"]["cmsRun1"]["errorsBySite"]["average"][
                "8020"
            ],
            10,
        )
        self.assertEqual(workloadSummary["histograms"]["workflowLevel"]["failuresBySite"]["stdDev"]["Failed Jobs"], 0)
        self.assertEqual(
            workloadSummary["histograms"]["stepLevel"]["/TestWorkload/ReReco"]["cmsRun1"]["errorsBySite"]["stdDev"][
                "99999"
            ],
            0,
        )
        self.assertEqual(
            workloadSummary["histograms"]["stepLevel"]["/TestWorkload/ReReco"]["cmsRun1"]["errorsBySite"]["stdDev"][
                "8020"
            ],
            0,
        )
        return
Esempio n. 20
0
    def testA_BasicFunctionTest(self):
        """
        _BasicFunctionTest_

        Tests the components, by seeing if they can process a simple set of closeouts
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = False)

        # Create second workload
        testJobGroup2 = self.createTestJobGroup(config = config,
                                                name = workload.name(),
                                                filesetName = "TestFileset_2",
                                                specLocation = workloadPath,
                                                task = "/TestWorkload/ReReco/LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        cachePath2 = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "LogCollect")
        os.makedirs(cachePath2)
        self.assertTrue(os.path.exists(cachePath2))


        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 2)

        workflowName = "TestWorkload"
        dbname       = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobs = jobdb.loadView("JobDump", "jobsByWorkflowName",
                              options = {"startkey": [workflowName],
                                         "endkey": [workflowName, {}]})['rows']
        self.assertEqual(len(jobs), 2*self.nJobs)

        from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase
        create = CreateWMBSBase()
        tables = []
        for x in create.requiredTables:
            tables.append(x[2:])

        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_fileset")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        # Make sure we deleted the directory
        self.assertFalse(os.path.exists(cachePath))
        self.assertFalse(os.path.exists(os.path.join(self.testDir, 'workloadTest/TestWorkload')))

        testWMBSFileset = Fileset(id = 1)
        self.assertEqual(testWMBSFileset.exists(), False)



        workloadSummary = workdatabase.document(id = "TestWorkload")
        # Check ACDC
        self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url'])

        # Check the output
        self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO',
                                                            '/Electron/MorePenguins-v0/ALCARECO'])

        # Check performance
        # Check histograms
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['average'],
                                0.062651899999999996, places = 2)
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['nEvents'],
                         5)

        # Check standard performance
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['average'], 9.4950600000000005,
                                places = 2)
        self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['stdDev'], 8.2912400000000002,
                                places = 2)

        # Check worstOffenders
        self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['worstOffenders'],
                         [{'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1},
                          {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2},
                          {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 3}])

        # Check retryData
        self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'0': 10})

        # LogCollect task is made out of identical FWJRs
        # assert that it is identical
        for x in workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys():
            if x in config.TaskArchiver.histogramKeys:
                continue
            for y in ['average', 'stdDev']:
                self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y],
                                        workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'][x][y],
                                        places = 2)

        # The TestWorkload should have no jobs left
        workflowName = "TestWorkload"
        jobs = jobdb.loadView("JobDump", "jobsByWorkflowName",
                              options = {"startkey": [workflowName],
                                         "endkey": [workflowName, {}]})['rows']
        self.assertEqual(len(jobs), 0)
        jobs = fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                               options = {"startkey": [workflowName],
                                          "endkey": [workflowName, {}]})['rows']
        self.assertEqual(len(jobs), 0)
        return
Esempio n. 21
0
    def testA_BasicFunctionTest(self):
        """
        _BasicFunctionTest_

        Tests the components, by seeing if they can process a simple set of closeouts
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=False)

        # Create second workload
        testJobGroup2 = self.createTestJobGroup(
            config=config,
            name=workload.name(),
            filesetName="TestFileset_2",
            specLocation=workloadPath,
            task="/TestWorkload/ReReco/LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload",
                                 "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        cachePath2 = os.path.join(config.JobCreator.jobCacheDir,
                                  "TestWorkload", "LogCollect")
        os.makedirs(cachePath2)
        self.assertTrue(os.path.exists(cachePath2))

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 2)

        workflowName = "TestWorkload"
        dbname = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)
        jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobs = jobdb.loadView("JobDump",
                              "jobsByWorkflowName",
                              options={
                                  "startkey": [workflowName],
                                  "endkey": [workflowName, {}]
                              })['rows']
        self.assertEqual(len(jobs), 2 * self.nJobs)

        from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase
        create = CreateWMBSBase()
        tables = []
        for x in create.requiredTables:
            tables.append(x[2:])

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_fileset")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData(
            "SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        # Make sure we deleted the directory
        self.assertFalse(os.path.exists(cachePath))
        self.assertFalse(
            os.path.exists(
                os.path.join(self.testDir, 'workloadTest/TestWorkload')))

        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        workloadSummary = workdatabase.document(id="TestWorkload")
        # Check ACDC
        self.assertEqual(workloadSummary['ACDCServer'],
                         sanitizeURL(config.ACDC.couchurl)['url'])

        # Check the output
        self.assertEqual(workloadSummary['output'].keys(),
                         ['/Electron/MorePenguins-v0/RECO'])
        self.assertEqual(
            sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO']
                   ['tasks']),
            ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect'])
        # Check performance
        # Check histograms
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['histogram'][0]['average'],
            0.89405199999999996,
            places=2)
        self.assertEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['histogram'][0]['nEvents'], 10)

        # Check standard performance
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['TotalJobCPU']['average'],
            17.786300000000001,
            places=2)
        self.assertAlmostEquals(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['TotalJobCPU']['stdDev'],
            0.0,
            places=2)

        # Check worstOffenders
        self.assertEqual(
            workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']
            ['AvgEventTime']['worstOffenders'], [{
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 1
            }, {
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 1
            }, {
                'logCollect': None,
                'log': None,
                'value': '0.894052',
                'jobID': 2
            }])

        # Check retryData
        self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'],
                         {'1': 10})
        logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar'
        self.assertEqual(workloadSummary['logArchives'], {
            '/TestWorkload/ReReco/LogCollect':
            [logCollectPFN for _ in range(10)]
        })

        # LogCollect task is made out of identical FWJRs
        # assert that it is identical
        for x in workloadSummary['performance'][
                '/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys():
            if x in config.TaskArchiver.histogramKeys:
                continue
            for y in ['average', 'stdDev']:
                self.assertAlmostEquals(
                    workloadSummary['performance']
                    ['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y],
                    workloadSummary['performance']['/TestWorkload/ReReco']
                    ['cmsRun1'][x][y],
                    places=2)

        return
Esempio n. 22
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(config=config,
                                               name=workload.name(),
                                               specLocation=workloadPath,
                                               error=True)
        # Create second workload
        testJobGroup2 = self.createTestJobGroup(config=config,
                                                name=workload.name(),
                                                filesetName="TestFileset_2",
                                                specLocation=workloadPath,
                                                task="/TestWorkload/ReReco/LogCollect",
                                                jobType="LogCollect")

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb = CouchServer(config.JobStateMachine.couchurl)
        jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView("JobDump", "jobsByWorkflowName",
                       options={"startkey": [workload.name()],
                                "endkey": [workload.name(), {}]})['rows']
        fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                        options={"startkey": [workload.name()],
                                 "endkey": [workload.name(), {}]})['rows']

        self.populateWorkflowWithCompleteStatus()
        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        cleanCouch = CleanCouchPoller(config=config)
        cleanCouch.setup()
        cleanCouch.algorithm()

        dbname = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id=workload.name())

        self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500)
        self.assertTrue('99999' in workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'])

        failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs']
        self.assertEqual(failedRunInfo, {'10': [[12312, 12312]]},
                         "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(
            workloadSummary['histograms']['workflowLevel']['failuresBySite']['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data'][
                'T1_IT_CNAF']['99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data'][
                'T1_IT_CNAF']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['average']['Failed Jobs'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average'][
                '99999'], 10)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average'][
                '8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['stdDev']['Failed Jobs'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev'][
                '99999'], 0)
        self.assertEqual(
            workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev'][
                '8020'], 0)
        return