def testC_Profile(self): """ _Profile_ DON'T RUN THIS! """ return import cProfile, pstats myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name = name, config = config, nSubs = 10, nJobs = 1000, nFiles = 10) cleanCouch = CleanCouchPoller(config = config) cleanCouch.setup() cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename = "testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return
def testC_Profile(self): """ _Profile_ DON'T RUN THIS! """ import cProfile import pstats name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10) cleanCouch = CleanCouchPoller(config=config) cleanCouch.setup() cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename="testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return
def preInitialization(self): print "TaskArchiver.preInitialization" # Add event loop to worker manager myThread = threading.currentThread() pollInterval = self.config.TaskArchiver.pollInterval logging.info("Setting task archiver poll interval to %s seconds" % pollInterval) myThread.workerThreadManager.addWorker(TaskArchiverPoller(self.config), pollInterval) couchInterval = self.config.TaskArchiver.cleanCouchInterval logging.info( "Setting poll interval for cleanup old couch doc to %s seconds" % couchInterval) myThread.workerThreadManager.addWorker(CleanCouchPoller(self.config), couchInterval) return
def testB_testErrors(self): """ _testErrors_ Test with a failed FWJR """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(config = config, name = workload.name(), specLocation = workloadPath, error = True) # Create second workload testJobGroup2 = self.createTestJobGroup(config = config, name = workload.name(), filesetName = "TestFileset_2", specLocation = workloadPath, task = "/TestWorkload/ReReco/LogCollect", type = "LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) couchdb = CouchServer(config.JobStateMachine.couchurl) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobdb.loadView("JobDump", "jobsByWorkflowName", options = {"startkey": [workload.name()], "endkey": [workload.name(), {}]})['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options = {"startkey": [workload.name()], "endkey": [workload.name(), {}]})['rows'] self.populateWorkflowWithCompleteStatus() testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() cleanCouch = CleanCouchPoller(config = config) cleanCouch.setup() cleanCouch.algorithm() dbname = getattr(config.JobStateMachine, "couchDBName") workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname) workloadSummary = workdatabase.document(id = workload.name()) self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500) self.assertTrue('99999' in workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']) failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs'] self.assertEqual(failedRunInfo, {'10' : [[12312, 12312]]}, "Wrong lumi information in the summary for failed jobs") # Check the failures by site histograms self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['data']['T1_IT_CNAF']['Failed Jobs'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10) self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['average']['Failed Jobs'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['99999'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['8020'], 10) self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['stdDev']['Failed Jobs'], 0) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['99999'], 0) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['8020'], 0) return
def testA_BasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(config = config, name = workload.name(), specLocation = workloadPath, error = False) # Create second workload testJobGroup2 = self.createTestJobGroup(config = config, name = workload.name(), filesetName = "TestFileset_2", specLocation = workloadPath, task = "/TestWorkload/ReReco/LogCollect", type = "LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) cachePath2 = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "LogCollect") os.makedirs(cachePath2) self.assertTrue(os.path.exists(cachePath2)) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 2) workflowName = "TestWorkload" dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobs = jobdb.loadView("JobDump", "jobsByWorkflowName", options = {"startkey": [workflowName], "endkey": [workflowName, {}]})['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options = {"startkey": [workflowName], "endkey": [workflowName, {}]})['rows'] self.assertEqual(len(jobs), 2*self.nJobs) from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase create = CreateWMBSBase() tables = [] for x in create.requiredTables: tables.append(x[2:]) self.populateWorkflowWithCompleteStatus() testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() cleanCouch = CleanCouchPoller(config = config) cleanCouch.setup() cleanCouch.algorithm() result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_fileset")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) # Make sure we deleted the directory self.assertFalse(os.path.exists(cachePath)) self.assertFalse(os.path.exists(os.path.join(self.testDir, 'workloadTest/TestWorkload'))) testWMBSFileset = Fileset(id = 1) self.assertEqual(testWMBSFileset.exists(), False) workloadSummary = workdatabase.document(id = "TestWorkload") # Check ACDC self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url']) # Check the output self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO']) self.assertEqual(sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO']['tasks']), ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect']) # Check performance # Check histograms self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['average'], 0.89405199999999996, places = 2) self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['nEvents'], 10) # Check standard performance self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['average'], 17.786300000000001, places = 2) self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['stdDev'], 0.0, places = 2) # Check worstOffenders self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['worstOffenders'], [{'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1}, {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1}, {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2}]) # Check retryData self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'1': 10}) logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar' self.assertEqual(workloadSummary['logArchives'], {'/TestWorkload/ReReco/LogCollect' : [logCollectPFN for _ in range(10)]}) # LogCollect task is made out of identical FWJRs # assert that it is identical for x in workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys(): if x in config.TaskArchiver.histogramKeys: continue for y in ['average', 'stdDev']: self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y], workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'][x][y], places = 2) return
def testB_testErrors(self): """ _testErrors_ Test with a failed FWJR """ config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(config=config, name=workload.name(), specLocation=workloadPath, error=True) # Create second workload testJobGroup2 = self.createTestJobGroup( config=config, name=workload.name(), filesetName="TestFileset_2", specLocation=workloadPath, task="/TestWorkload/ReReco/LogCollect", jobType="LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) couchdb = CouchServer(config.JobStateMachine.couchurl) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobdb.loadView("JobDump", "jobsByWorkflowName", options={ "startkey": [workload.name()], "endkey": [workload.name(), {}] })['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options={ "startkey": [workload.name()], "endkey": [workload.name(), {}] })['rows'] self.populateWorkflowWithCompleteStatus() testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() cleanCouch = CleanCouchPoller(config=config) cleanCouch.setup() cleanCouch.algorithm() dbname = getattr(config.JobStateMachine, "couchDBName") workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname) workloadSummary = workdatabase.document(id=workload.name()) self.assertEqual( workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500) self.assertTrue('99999' in workloadSummary['errors'] ['/TestWorkload/ReReco']['cmsRun1']) failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco'][ 'cmsRun1']['99999']['runs'] self.assertEqual( failedRunInfo, {'10': [[12312, 12312]]}, "Wrong lumi information in the summary for failed jobs") # Check the failures by site histograms self.assertEqual( workloadSummary['histograms']['workflowLevel']['failuresBySite'] ['data']['T1_IT_CNAF']['Failed Jobs'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10) self.assertEqual( workloadSummary['histograms']['workflowLevel']['failuresBySite'] ['average']['Failed Jobs'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['average']['99999'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['average']['8020'], 10) self.assertEqual( workloadSummary['histograms']['workflowLevel']['failuresBySite'] ['stdDev']['Failed Jobs'], 0) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['stdDev']['99999'], 0) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['stdDev']['8020'], 0) return
def testA_BasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(config=config, name=workload.name(), specLocation=workloadPath, error=False) # Create second workload testJobGroup2 = self.createTestJobGroup( config=config, name=workload.name(), filesetName="TestFileset_2", specLocation=workloadPath, task="/TestWorkload/ReReco/LogCollect", jobType="LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) cachePath2 = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "LogCollect") os.makedirs(cachePath2) self.assertTrue(os.path.exists(cachePath2)) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 2) workflowName = "TestWorkload" dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobs = jobdb.loadView("JobDump", "jobsByWorkflowName", options={ "startkey": [workflowName], "endkey": [workflowName, {}] })['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options={ "startkey": [workflowName], "endkey": [workflowName, {}] })['rows'] self.assertEqual(len(jobs), 2 * self.nJobs) from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase create = CreateWMBSBase() tables = [] for x in create.requiredTables: tables.append(x[2:]) self.populateWorkflowWithCompleteStatus() testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() cleanCouch = CleanCouchPoller(config=config) cleanCouch.setup() cleanCouch.algorithm() result = myThread.dbi.processData( "SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_fileset")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) # Make sure we deleted the directory self.assertFalse(os.path.exists(cachePath)) self.assertFalse( os.path.exists( os.path.join(self.testDir, 'workloadTest/TestWorkload'))) testWMBSFileset = Fileset(id=1) self.assertEqual(testWMBSFileset.exists(), False) workloadSummary = workdatabase.document(id="TestWorkload") # Check ACDC self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url']) # Check the output self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO']) self.assertEqual( sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO'] ['tasks']), ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect']) # Check performance # Check histograms self.assertAlmostEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['histogram'][0]['average'], 0.89405199999999996, places=2) self.assertEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['histogram'][0]['nEvents'], 10) # Check standard performance self.assertAlmostEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['TotalJobCPU']['average'], 17.786300000000001, places=2) self.assertAlmostEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['TotalJobCPU']['stdDev'], 0.0, places=2) # Check worstOffenders self.assertEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['worstOffenders'], [{ 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1 }, { 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1 }, { 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2 }]) # Check retryData self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'1': 10}) logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar' self.assertEqual(workloadSummary['logArchives'], { '/TestWorkload/ReReco/LogCollect': [logCollectPFN for _ in range(10)] }) # LogCollect task is made out of identical FWJRs # assert that it is identical for x in workloadSummary['performance'][ '/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys(): if x in config.TaskArchiver.histogramKeys: continue for y in ['average', 'stdDev']: self.assertAlmostEqual( workloadSummary['performance'] ['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y], workloadSummary['performance']['/TestWorkload/ReReco'] ['cmsRun1'][x][y], places=2) return