class ConfigurationTest(unittest.TestCase): """ test case for Configuration object """ def setUp(self): """set up""" self.testInit = TestInit(__file__) self.testDir = self.testInit.generateWorkDir() self.normalSave = "%s/WMCore_Agent_Configuration_t_normal.py" % self.testDir self.docSave = "%s/WMCore_Agent_Configuration_t_documented.py" % self.testDir self.commentSave = "%s/WMCore_Agent_Configuration_t_commented.py" % self.testDir def tearDown(self): """clean up""" self.testInit.delWorkDir() def testA(self): """ctor""" try: config = Configuration() except Exception, ex: msg = "Failed to instantiate Configuration\n" msg += str(ex) self.fail(msg)
class ConfigurationTest(unittest.TestCase): """ test case for Configuration object """ def setUp(self): """set up""" self.testInit = TestInit(__file__) self.testDir = self.testInit.generateWorkDir() self.normalSave = "%s/WMCore_Agent_Configuration_t_normal.py" % self.testDir self.docSave = "%s/WMCore_Agent_Configuration_t_documented.py" % self.testDir self.commentSave = "%s/WMCore_Agent_Configuration_t_commented.py" % self.testDir def tearDown(self): """clean up""" self.testInit.delWorkDir() def testA(self): """ctor""" try: config = Configuration() except Exception, ex: msg = "Failed to instantiate Configuration\n" msg += str(ex) self.fail(msg)
class BossAirTest(unittest.TestCase): """ Tests for the BossAir prototype """ sites = ['T2_US_UCSD', 'T2_TW_Taiwan', 'T1_CH_CERN', 'T2_US_Florida'] def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.tearDown() self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database" ], useDefault=False) self.testInit.setupCouch("bossair_t/jobs", "JobDump") self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump") self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs") #Create sites in resourceControl resourceControl = ResourceControl() for site in self.sites: resourceControl.insertSite(siteName=site, pnn='se.%s' % (site), cmsName=site, ceName=site, plugin="CondorPlugin", pendingSlots=1000, runningSlots=2000) resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \ maxSlots = 1000, pendingSlots = 1000) resourceControl.insertSite(siteName='Xanadu', pnn='se.Xanadu', cmsName=site, ceName='Xanadu', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'Xanadu', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) resourceControl.insertSite(siteName='jade-cms.hip.fi', pnn='madhatter.csc.fi', cmsName=site, ceName='jade-cms.hip.fi', plugin="ARCPlugin") resourceControl.insertThreshold(siteName = 'jade-cms.hip.fi', taskType = 'Processing', \ maxSlots = 100, pendingSlots = 100) # using this for glite submissions resourceControl.insertSite(siteName='grid-ce-01.ba.infn.it', pnn='storm-se-01.ba.infn.it', cmsName=site, ceName='grid-ce-01.ba.infn.it', plugin='gLitePlugin') resourceControl.insertThreshold(siteName = 'grid-ce-01.ba.infn.it', taskType = 'Processing', \ maxSlots = 50, pendingSlots = 50) # Create user newuser = self.daoFactory(classname="Users.New") newuser.execute(dn="tapas", group_name="phgroup", role_name="cmsrole") # We actually need the user name self.user = getpass.getuser() # Change this to the working dir to keep track of error and log files from condor self.testDir = self.testInit.generateWorkDir() # Set heartbeat componentName = 'test' self.heartbeatAPI = HeartbeatAPI(componentName) self.heartbeatAPI.registerComponent() componentName = 'JobTracker' self.heartbeatAPI2 = HeartbeatAPI(componentName) self.heartbeatAPI2.registerComponent() return def tearDown(self): """ Database deletion """ #self.testInit.clearDatabase(modules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"]) self.testInit.delWorkDir() self.testInit.tearDownCouch() return def getConfig(self): """ _getConfig_ Build a basic BossAir config """ config = self.testInit.getConfiguration() config.section_("Agent") config.Agent.agentName = 'testAgent' config.Agent.componentName = 'test' config.Agent.useHeartbeat = False config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.section_("BossAir") config.BossAir.pluginNames = ['TestPlugin', 'CondorPlugin'] config.BossAir.pluginDir = 'WMCore.BossAir.Plugins' config.BossAir.UISetupScript = '/afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh' config.component_("JobSubmitter") config.JobSubmitter.logLevel = 'INFO' config.JobSubmitter.pollInterval = 1 config.JobSubmitter.pluginName = 'AirPlugin' config.JobSubmitter.pluginDir = 'JobSubmitter.Plugins' config.JobSubmitter.submitDir = os.path.join(self.testDir, 'submit') config.JobSubmitter.submitNode = os.getenv("HOSTNAME", 'stevia.hep.wisc.edu') config.JobSubmitter.submitScript = os.path.join( WMCore.WMInit.getWMBASE(), 'test/python/WMComponent_t/JobSubmitter_t', 'submit.sh') config.JobSubmitter.componentDir = os.path.join( os.getcwd(), 'Components') config.JobSubmitter.workerThreads = 2 config.JobSubmitter.jobsPerWorker = 200 config.JobSubmitter.gLiteConf = os.path.join(os.getcwd(), 'config.cfg') # JobTracker config.component_("JobTracker") config.JobTracker.logLevel = 'INFO' config.JobTracker.pollInterval = 1 # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL') config.JobStateMachine.couchDBName = "bossair_t" # JobStatusLite config.component_('JobStatusLite') config.JobStatusLite.componentDir = os.path.join( os.getcwd(), 'Components') config.JobStatusLite.stateTimeouts = {'Pending': 10, 'Running': 86400} config.JobStatusLite.pollInterval = 1 return config def createTestWorkload(self, workloadName='Test', emulator=True): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") rereco = workload.getTask("ReReco") taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.save(workloadName) return workload def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]): """ Creates a series of jobGroups for submissions """ jobGroupList = [] testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=makeUUID(), task="basicWorkload/Production", owner_vogroup='phgroup', owner_vorole='cmsrole') testWorkflow.create() # Create subscriptions for i in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site, bl=bl, wl=wl) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site=None, bl=[], wl=[]): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10) if site: testFile.setLocation(site) else: for tmpSite in self.sites: testFile.setLocation('se.%s' % (tmpSite)) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name='%s-%i' % (name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob['custom']['location'] = f.getLocations()[0] testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob['owner'] = 'tapas' testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob['ownerDN'] = 'tapas' testJob['ownerRole'] = 'cmsrole' testJob['ownerGroup'] = 'phgroup' jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'), 'w') pickle.dump(testJob, output) output.close() return testJob, testFile def createDummyJobs(self, nJobs, location=None): """ _createDummyJobs_ Create some dummy jobs """ if not location: location = self.sites[0] nameStr = makeUUID() testWorkflow = Workflow(spec=nameStr, owner="tapas", name=nameStr, task="basicWorkload/Production", owner_vogroup='phgroup', owner_vorole='cmsrole') testWorkflow.create() testFileset = Fileset(name=nameStr) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() jobList = [] for i in range(nJobs): testJob = Job(name='%s-%i' % (nameStr, i)) testJob['location'] = location testJob['custom']['location'] = location testJob['userdn'] = 'tapas' testJob['owner'] = 'tapas' testJob['userrole'] = 'cmsrole' testJob['usergroup'] = 'phgroup' testJob.create(testJobGroup) jobList.append(testJob) return jobList @attr('integration') def testA_APITest(self): """ _APITest_ This is a commissioning test that has very little to do with anything except loading the code. """ #return myThread = threading.currentThread() config = self.getConfig() baAPI = BossAirAPI(config=config) # We should have loaded a plugin self.assertTrue('TestPlugin' in baAPI.plugins.keys()) result = myThread.dbi.processData( "SELECT name FROM bl_status")[0].fetchall() statusList = [] for i in result: statusList.append(i.values()[0]) # We should have the plugin states in the database self.assertEqual(statusList.sort(), ['New', 'Dead', 'Gone'].sort()) # Create some jobs nJobs = 10 jobDummies = self.createDummyJobs(nJobs=nJobs) print(jobDummies) baAPI.createNewJobs(wmbsJobs=jobDummies) runningJobs = baAPI._listRunJobs() self.assertEqual(len(runningJobs), nJobs) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), nJobs) deadJobs = baAPI._loadByStatus(status='Dead') self.assertEqual(len(deadJobs), 0) raisesException = False self.assertRaises(BossAirException, baAPI._loadByStatus, status='FalseStatus') # Change the job status and update it for job in newJobs: job['status'] = 'Dead' baAPI._updateJobs(jobs=newJobs) # Test whether we see the job status as updated newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), 0) deadJobs = baAPI._loadByStatus(status='Dead') self.assertEqual(len(deadJobs), nJobs) # Can we load by BossAir ID? loadedJobs = baAPI._loadByID(jobs=deadJobs) self.assertEqual(len(loadedJobs), nJobs) # Can we load via WMBS? loadedJobs = baAPI.loadByWMBS(wmbsJobs=jobDummies) self.assertEqual(len(loadedJobs), nJobs) # See if we can delete jobs baAPI._deleteJobs(jobs=deadJobs) # Confirm that they're gone deadJobs = baAPI._loadByStatus(status='Dead') self.assertEqual(len(deadJobs), 0) self.assertEqual(len(baAPI.jobs), 0) return @attr('integration') def testB_PluginTest(self): """ _PluginTest_ Now check that these functions worked if called through plugins Instead of directly. There are only three plugin """ #return myThread = threading.currentThread() config = self.getConfig() baAPI = BossAirAPI(config=config) # Create some jobs nJobs = 10 jobDummies = self.createDummyJobs(nJobs=nJobs, location='Xanadu') changeState = ChangeState(config) changeState.propagate(jobDummies, 'created', 'new') changeState.propagate(jobDummies, 'executing', 'created') # Prior to building the job, each job must have a plugin # and user assigned for job in jobDummies: job['plugin'] = 'TestPlugin' job['owner'] = 'tapas' baAPI.submit(jobs=jobDummies) newJobs = baAPI._loadByStatus(status='New') self.assertEqual(len(newJobs), nJobs) # Should be no more running jobs runningJobs = baAPI._listRunJobs() self.assertEqual(len(runningJobs), nJobs) # Test Plugin should complete all jobs baAPI.track() # Should be no more running jobs runningJobs = baAPI._listRunJobs() self.assertEqual(len(runningJobs), 0) # Check if they're complete completeJobs = baAPI.getComplete() self.assertEqual(len(completeJobs), nJobs) # Do this test because BossAir is specifically built # to keep it from finding completed jobs result = myThread.dbi.processData( "SELECT id FROM bl_runjob")[0].fetchall() self.assertEqual(len(result), nJobs) baAPI.removeComplete(jobs=jobDummies) result = myThread.dbi.processData( "SELECT id FROM bl_runjob")[0].fetchall() self.assertEqual(len(result), 0) return def testG_monitoringDAO(self): """ _monitoringDAO_ Because I need a test for the monitoring DAO """ return myThread = threading.currentThread() config = self.getConfig() changeState = ChangeState(config) baAPI = BossAirAPI(config=config) # Create some jobs nJobs = 10 jobDummies = self.createDummyJobs(nJobs=nJobs) # Prior to building the job, each job must have a plugin # and user assigned for job in jobDummies: job['plugin'] = 'TestPlugin' job['owner'] = 'tapas' job['location'] = 'T2_US_UCSD' job.save() baAPI.submit(jobs=jobDummies) results = baAPI.monitor() self.assertEqual(len(results), nJobs) for job in results: self.assertEqual(job['plugin'], 'CondorPlugin') return
class RetryManagerTest(EmulatedUnitTestCase): """ TestCase for TestRetryManager module """ def setUp(self): """ setup for test. """ super(RetryManagerTest, self).setUp() myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.testInit.setupCouch("retry_manager_t/jobs", "JobDump") self.testInit.setupCouch("retry_manager_t/fwjrs", "FWJRDump") self.daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daofactory(classname="Jobs.GetAllJobs") self.setJobTime = self.daofactory(classname="Jobs.SetStateTime") self.increaseRetry = self.daofactory(classname="Jobs.IncrementRetry") self.testDir = self.testInit.generateWorkDir() self.configFile = EmulatorSetup.setupWMAgentConfig() self.nJobs = 10 return def tearDown(self): """ Database deletion """ self.testInit.clearDatabase() self.testInit.delWorkDir() self.testInit.tearDownCouch() EmulatorSetup.deleteConfig(self.configFile) return def getConfig(self): """ _getConfig_ """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) # First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", self.testDir) config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.component_("RetryManager") config.RetryManager.logLevel = 'DEBUG' config.RetryManager.namespace = 'WMComponent.RetryManager.RetryManager' config.RetryManager.pollInterval = 10 # These are the cooloff times for the RetryManager, the times it waits # Before attempting resubmission config.RetryManager.section_("DefaultRetryAlgo") config.RetryManager.DefaultRetryAlgo.section_("default") config.RetryManager.DefaultRetryAlgo.default.coolOffTime = { 'create': 120, 'submit': 120, 'job': 120 } # Path to plugin directory config.RetryManager.pluginPath = 'WMComponent.RetryManager.PlugIns' config.RetryManager.WMCoreBase = WMCore.WMBase.getWMBASE() config.RetryManager.componentDir = os.path.join( os.getcwd(), 'Components') # ErrorHandler # Not essential, but useful for ProcessingAlgo config.component_("ErrorHandler") config.ErrorHandler.maxRetries = 5 # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', None) config.JobStateMachine.couchDBName = "retry_manager_t" return config def createTestJobGroup(self, nJobs, subType="Processing", retryOnce=False): """ _createTestJobGroup_ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() for _ in range(0, nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJobGroup.commit() if retryOnce: self.increaseRetry.execute(testJobGroup.jobs) return testJobGroup def testA_Create(self): """ WMComponent_t.RetryManager_t.RetryManager_t:testCreate() Mimics creation of component and test jobs failed in create stage. """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') idList = self.getJobs.execute(state='CreateCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='CreateCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='CreateCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) return def testB_Submit(self): """ WMComponent_t.RetryManager_t.RetryManager_t:testSubmit() Mimics creation of component and test jobs failed in create stage. """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) return def testC_Job(self): """ WMComponent_t.RetryManager_t.RetryManager_t:testJob() Mimics creation of component and test jobs failed in create stage. """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') idList = self.getJobs.execute(state='JobCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) return def testD_SquaredAlgo(self): """ _testSquaredAlgo_ Test the squared algorithm to make sure it loads and works """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing': 'SquaredAlgo'} config.RetryManager.section_("SquaredAlgo") config.RetryManager.SquaredAlgo.section_("Processing") config.RetryManager.SquaredAlgo.Processing.coolOffTime = { 'create': 10, 'submit': 10, 'job': 10 } changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 12) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) def testE_ExponentialAlgo(self): """ _testExponentialAlgo_ Test the exponential algorithm to make sure it loads and works """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing': 'ExponentialAlgo'} config.RetryManager.section_("ExponentialAlgo") config.RetryManager.ExponentialAlgo.section_("Processing") config.RetryManager.ExponentialAlgo.Processing.coolOffTime = { 'create': 10, 'submit': 10, 'job': 10 } changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 12) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) def testF_LinearAlgo(self): """ _testLinearAlgo_ Test the linear algorithm to make sure it loads and works """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing': 'LinearAlgo'} config.RetryManager.section_("LinearAlgo") config.RetryManager.LinearAlgo.section_("Processing") config.RetryManager.LinearAlgo.Processing.coolOffTime = { 'create': 10, 'submit': 10, 'job': 10 } changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 12) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) return def testG_ProcessingAlgo(self): """ _ProcessingAlgo_ Test for the ProcessingAlgo Prototype """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing': 'ProcessingAlgo'} config.RetryManager.section_("ProcessingAlgo") config.RetryManager.ProcessingAlgo.section_("default") config.RetryManager.ProcessingAlgo.default.coolOffTime = { 'create': 10, 'submit': 10, 'job': 10 } changer = ChangeState(config) fwjrPath = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t", "fwjrs/badBackfillJobReport.pkl") report = Report() report.load(fwjrPath) for job in testJobGroup.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save( os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') testRetryManager = RetryManagerPoller(config) testRetryManager.algorithm() idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: j = Job(id=job['id']) j.load() self.assertEqual(j['retry_count'], 1) report.save( os.path.join(j['cache_dir'], "Report.%i.pkl" % j['retry_count'])) config.RetryManager.ProcessingAlgo.default.OneMoreErrorCodes = [8020] testRetryManager2 = RetryManagerPoller(config) testRetryManager2.algorithm() idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: j = Job(id=job['id']) j.load() self.assertEqual(j['retry_count'], 5) # Now test timeout testJobGroup2 = self.createTestJobGroup(nJobs=self.nJobs) # Cycle jobs for job in testJobGroup2.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save( os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup2.jobs, 'created', 'new') changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup2.jobs: j = Job(id=job['id']) j.load() self.assertEqual(j['retry_count'], 0) config.RetryManager.ProcessingAlgo.default.OneMoreErrorCodes = [] config.RetryManager.ProcessingAlgo.default.MaxRunTime = 1 testRetryManager3 = RetryManagerPoller(config) testRetryManager3.algorithm() idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs * 2) for job in testJobGroup2.jobs: j = Job(id=job['id']) j.load() self.assertEqual(j['retry_count'], 5) return def testH_PauseAlgo(self): """ _testH_PauseAlgo_ Test the pause algorithm, note that given pauseCount = n, the job will run first n + 1 times before being paused. After that it will be paused each n times """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) # adding a 2nd job group testJobGroup2 = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing': 'PauseAlgo'} config.RetryManager.section_("PauseAlgo") config.RetryManager.PauseAlgo.section_("Processing") config.RetryManager.PauseAlgo.Processing.coolOffTime = { 'create': 20, 'submit': 20, 'job': 20 } config.RetryManager.PauseAlgo.Processing.pauseCount = 2 changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) report = Report() # Making sure that jobs are not created ahead of time for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 15) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 25) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') # Make sure that no change happens before timeout for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 75) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be paused for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 85) # Make sure that the plugin pauses them testRetryManager.algorithm(None) idList = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList), self.nJobs) # Emulating ops retrying the job changer.propagate(testJobGroup.jobs, 'created', 'jobpaused') # Making sure it did the right thing idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 175) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 185) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 315) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='jobcooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 325) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList), self.nJobs) # a configurable retry count per job type {jobExitCodeA: pauseCountB} config.RetryManager.PauseAlgo.Processing.retryErrorCodes = { 8020: 1, 12345: 1, 5555: 2 } testRetryManager2 = RetryManagerPoller(config) testRetryManager2.algorithm() fwjrPath = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t", "fwjrs/badBackfillJobReport.pkl") report.load(fwjrPath) for job in testJobGroup2.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save( os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) # fail the jobs changer.propagate(testJobGroup2.jobs, 'created', 'new') changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') # Giving time so they can be paused for job in testJobGroup2.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 85) # Make sure that the plugin sent those jobs to the next state: testRetryManager2.algorithm() # job exit code is 8020, so it is supposed to be retried one time. # Meaning, that here we should have 10 jobs (from the first part of the test) in jobpaused # and 10 jobs in created state idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) idList2 = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList2), self.nJobs) # save a second job report - with a retry count = 1 for job in testJobGroup2.jobs: j = Job(id=job['id']) j.load() j['retry_count'] = 1 self.assertEqual(j['retry_count'], 1) report.save( os.path.join(j['cache_dir'], "Report.%i.pkl" % j['retry_count'])) # Fail them out again changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup2.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 175) # not sure if this check is needed: idList = self.getJobs.execute(state='jobcooloff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be paused for job in testJobGroup2.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 85) # Make sure that the plugin sent those jobs to paused state: testRetryManager2.algorithm(None) idList = self.getJobs.execute(state='jobpaused') # And again, in total, there should be 10+10=20 jobs in jobpaused self.assertEqual(len(idList), self.nJobs * 2) return def testI_MultipleJobTypes(self): """ _testI_MultipleJobTypes_ Check that we can configure different retry algorithms for different job types, including a default for nonspecified types. Also check that two job types can share the same retry algorithm but with different parameters """ # Let's create 4 job groups processingJobGroup = self.createTestJobGroup(nJobs=10, retryOnce=True) productionJobGroup = self.createTestJobGroup(nJobs=15, subType="Production", retryOnce=True) mergeJobGroup = self.createTestJobGroup(nJobs=20, subType="Merge", retryOnce=True) skimJobGroup = self.createTestJobGroup(nJobs=5, subType="Skim", retryOnce=True) # Set an adequate config # Processing jobs get the PauseAlgo with pauseCount 4 # Production jobs get the ExponentialAlgo # Merge jobs get the PauseAlgo but with pauseCount 2 which is the default # Skim jobs are not configured, so they get the default SquaredAlgo config = self.getConfig() config.RetryManager.plugins = { 'Processing': 'PauseAlgo', 'Production': 'ExponentialAlgo', 'Merge': 'PauseAlgo', 'default': 'SquaredAlgo' } config.RetryManager.section_("PauseAlgo") config.RetryManager.PauseAlgo.section_("Processing") config.RetryManager.PauseAlgo.Processing.coolOffTime = { 'create': 30, 'submit': 30, 'job': 30 } config.RetryManager.PauseAlgo.Processing.pauseCount = 4 config.RetryManager.PauseAlgo.section_("default") config.RetryManager.PauseAlgo.default.coolOffTime = { 'create': 60, 'submit': 60, 'job': 60 } config.RetryManager.PauseAlgo.default.pauseCount = 2 config.RetryManager.section_("ExponentialAlgo") config.RetryManager.ExponentialAlgo.section_("Production") config.RetryManager.ExponentialAlgo.Production.coolOffTime = { 'create': 30, 'submit': 30, 'job': 30 } config.RetryManager.ExponentialAlgo.section_("default") config.RetryManager.ExponentialAlgo.default.coolOffTime = { 'create': 60, 'submit': 60, 'job': 60 } config.RetryManager.section_("SquaredAlgo") config.RetryManager.SquaredAlgo.section_("Skim") config.RetryManager.SquaredAlgo.Skim.coolOffTime = { 'create': 30, 'submit': 30, 'job': 30 } config.RetryManager.SquaredAlgo.section_("default") config.RetryManager.SquaredAlgo.default.coolOffTime = { 'create': 60, 'submit': 60, 'job': 60 } # Start the state changer and RetryManager changer = ChangeState(config) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) # Create the jobs for the first time changer.propagate(processingJobGroup.jobs, 'created', 'new') # Let's start with the processing jobs and the pauseAlgo for count in range(1, 5): # Fail the jobs changer.propagate(processingJobGroup.jobs, 'executing', 'created') changer.propagate(processingJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(processingJobGroup.jobs, 'jobcooloff', 'jobfailed') # Check that the cooloff time is strictly enforced # First a job time just below the cooloff time for job in processingJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 30 * pow(count, 2) + 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual( len(idList), len(processingJobGroup.jobs), "Jobs went into cooloff without the proper timing") # Now above the cooloff time for job in processingJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 30 * pow(count, 2) - 5) testRetryManager.algorithm(None) # Make sure the jobs get created again or go to paused if count < 4: idList = self.getJobs.execute(state='created') else: idList = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList), len(processingJobGroup.jobs), "Jobs didn't change state correctly") # Unpause them so they don't interfere with subsequent tests changer.propagate(processingJobGroup.jobs, 'created', 'jobpaused') changer.propagate(processingJobGroup.jobs, 'executing', 'created') # Now the production jobs and the exponential algo changer.propagate(productionJobGroup.jobs, 'created', 'new') for count in range(1, 3): changer.propagate(productionJobGroup.jobs, 'executing', 'created') changer.propagate(productionJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(productionJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in productionJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - pow(30, count) + 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual( len(idList), len(productionJobGroup.jobs), "Jobs went into cooloff without the proper timing") for job in productionJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - pow(30, count) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), len(productionJobGroup.jobs), "Jobs didn't change state correctly") # Send them to executing changer.propagate(productionJobGroup.jobs, 'executing', 'created') # Now the merge jobs and the paused algo with different parameters changer.propagate(mergeJobGroup.jobs, 'created', 'new') for count in range(1, 3): changer.propagate(mergeJobGroup.jobs, 'executing', 'created') changer.propagate(mergeJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(mergeJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in mergeJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 30 * pow(count, 2) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual( len(idList), len(mergeJobGroup.jobs), "Jobs went into cooloff without the proper timing") for job in mergeJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 60 * pow(count, 2) - 5) testRetryManager.algorithm(None) if count < 2: idList = self.getJobs.execute(state='created') else: idList = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList), len(mergeJobGroup.jobs), "Jobs didn't change state correctly") # Send them to executing changer.propagate(mergeJobGroup.jobs, 'created', 'jobpaused') changer.propagate(mergeJobGroup.jobs, 'executing', 'created') # Now the skim jobs and the squared algo changer.propagate(skimJobGroup.jobs, 'created', 'new') for count in range(1, 3): changer.propagate(skimJobGroup.jobs, 'executing', 'created') changer.propagate(skimJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(skimJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in skimJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 30 * pow(count, 2) + 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual( len(idList), len(skimJobGroup.jobs), "Jobs went into cooloff without the proper timing") for job in skimJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 30 * pow(count, 2) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), len(skimJobGroup.jobs), "Jobs didn't change state correctly") def testY_MultipleIterations(self): """ _MultipleIterations_ Paranoia based check to see if I'm saving class instances correctly """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'submitfailed', 'Created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) # Make a new jobGroup for a second run testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) # Set job state changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') # Set them to go off for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 200) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs * 2) return @attr('integration') def testZ_Profile(self): """ _Profile_ Do a basic profiling of the algo """ import pstats nJobs = 1000 testJobGroup = self.createTestJobGroup(nJobs=nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') idList = self.getJobs.execute(state='CreateCooloff') self.assertEqual(len(idList), nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='CreateCooloff') self.assertEqual(len(idList), nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 150) startTime = time.time() # cProfile.runctx("testRetryManager.algorithm()", globals(), locals(), filename = "profStats.stat") testRetryManager.algorithm(None) stopTime = time.time() idList = self.getJobs.execute(state='CreateCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state='New') self.assertEqual(len(idList), nJobs) print("Took %f seconds to run polling algo" % (stopTime - startTime)) p = pstats.Stats('profStats.stat') p.sort_stats('cumulative') p.print_stats(0.2) return
class TaskChainTests(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("taskchain_t", "ConfigCache") self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("taskchain_t") self.testInit.generateWorkDir() self.workload = None return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def testGeneratorWorkflow(self): """ _testGeneratorWorkflow_ Test creating a request with an initial generator task it mocks a request where there are 2 similar paths starting from the generator, each one with a different PrimaryDataset, CMSSW configuration and processed dataset. Dropping the RAW output as well. Also include an ignored output module to keep things interesting... """ generatorDoc = makeGeneratorConfig(self.configDatabase) processorDocs = makeProcessingConfigs(self.configDatabase) arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist" : ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain" : 6, "IgnoredOutputModules" : ["writeSkim2", "writeRAWDEBUGDIGI"], "Task1" :{ "TaskName" : "GenSim", "ConfigCacheID" : generatorDoc, "SplittingAlgorithm" : "EventBased", "SplittingArguments" : {"events_per_job" : 250}, "RequestNumEvents" : 10000, "Seeding" : "Automatic", "PrimaryDataset" : "RelValTTBar", }, "Task2" : { "TaskName" : "DigiHLT_new", "InputTask" : "GenSim", "InputFromOutputModule" : "writeGENSIM", "ConfigCacheID" : processorDocs['DigiHLT'], "SplittingAlgorithm" : "LumiBased", "SplittingArguments" : {"lumis_per_job" : 2 }, "CMSSWVersion" : "CMSSW_5_2_6", "GlobalTag" : "GR_39_P_V5:All", "PrimaryDataset" : "PURelValTTBar", "KeepOutput" : False }, "Task3" : { "TaskName" : "DigiHLT_ref", "InputTask" : "GenSim", "InputFromOutputModule" : "writeGENSIM", "ConfigCacheID" : processorDocs['DigiHLT'], "SplittingAlgorithm" : "EventBased", "SplittingArguments" : {"events_per_job" : 100 }, "CMSSWVersion" : "CMSSW_5_2_7", "GlobalTag" : "GR_40_P_V5:All", "AcquisitionEra" : "ReleaseValidationNewConditions", "ProcessingVersion" : 3, "ProcessingString" : "Test", "KeepOutput" : False }, "Task4" : { "TaskName" : "Reco", "InputTask" : "DigiHLT_new", "InputFromOutputModule" : "writeRAWDIGI", "ConfigCacheID" : processorDocs['Reco'], "SplittingAlgorithm" : "FileBased", "SplittingArguments" : {"files_per_job" : 1 }, "TransientOutputModules" : ["writeRECO"] }, "Task5" : { "TaskName" : "ALCAReco", "InputTask" : "DigiHLT_ref", "InputFromOutputModule" : "writeRAWDIGI", "ConfigCacheID" : processorDocs['ALCAReco'], "SplittingAlgorithm" : "LumiBased", "SplittingArguments" : {"lumis_per_job" : 8 }, }, "Task6" : { "TaskName" : "Skims", "InputTask" : "Reco", "InputFromOutputModule" : "writeRECO", "ConfigCacheID" : processorDocs['Skims'], "SplittingAlgorithm" : "LumiBased", "SplittingArguments" : {"lumis_per_job" : 10 }, } } factory = TaskChainWorkloadFactory() # Test a malformed task chain definition arguments['Task4']['TransientOutputModules'].append('writeAOD') self.assertRaises(WMSpecFactoryException, factory.validateSchema, arguments) arguments['Task4']['TransientOutputModules'].remove('writeAOD') try: factory.validateSchema(arguments) self.workload = factory("PullingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) self.workload.setSpecUrl("somespec") self.workload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(self.workload, "GenSim", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) firstTask = self.workload.getTaskByPath("/PullingTheChain/GenSim") self._checkTask(firstTask, arguments['Task1'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new"), arguments['Task2'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref"), arguments['Task3'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco"), arguments['Task4'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref/ALCAReco"), arguments['Task5'], arguments) self._checkTask(self.workload.getTaskByPath("/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco/Skims"), arguments['Task6'], arguments) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 11, "Number of output datasets doesn't match") self.assertTrue("/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM" in outputDatasets, "/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM not in output datasets") self.assertFalse("/RelValTTBar/ReleaseValidation-reco-v1/RECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-reco-v1/RECO in output datasets") self.assertTrue("/RelValTTBar/ReleaseValidation-AOD-v1/AOD" in outputDatasets, "/RelValTTBar/ReleaseValidation-AOD-v1/AOD not in output datasets") self.assertTrue("/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO not in output datasets") for i in range(1, 5): self.assertTrue("/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO not in output datasets" % i) for i in range(1, 6): if i == 2: continue self.assertTrue("/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD not in output datasets" % i) return
class ReDigiTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ super(ReDigiTest, self).setUp() self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("redigi_t", "ConfigCache") self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) self.testInit.generateWorkDir() couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("redigi_t") return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() EmulatorHelper.resetEmulators() super(ReDigiTest, self).tearDown() return def testDependentReDigi(self): """ _testDependentReDigi_ Verfiy that a dependent ReDigi workflow that keeps stages out RAW data is created and installed into WMBS correctly. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput" defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput" factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput") stepOneUnmergedRAWFileset.loadData() stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged") stepOneMergedRAWFileset.loadData() stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepOneLogArchiveFileset.loadData() stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive") stepOneMergeLogArchiveFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule") stepThreeUnmergedAODFileset.loadData() stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged") stepThreeMergedAODFileset.loadData() stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive") stepThreeLogArchiveFileset.loadData() stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive") stepThreeMergeLogArchiveFileset.loadData() stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepOneWorkflow.load() self.assertEqual(stepOneWorkflow.wfType, 'reprocessing') self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") for outputMod in stepOneWorkflow.outputMap.keys(): self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset) stepOneSub.loadData() self.assertEqual(stepOneSub["type"], "Processing", "Error: Step one sub has wrong type.") stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput") stepOneCleanupWorkflow.load() self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneCleanupSub.loadData() self.assertEqual(stepOneCleanupSub["type"], "Cleanup", "Error: Step one sub has wrong type.") stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepOneLogCollectWorkflow.load() self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset) stepOneLogCollectSub.loadData() self.assertEqual(stepOneLogCollectSub["type"], "LogCollect", "Error: Step one sub has wrong type.") stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput") stepOneMergeWorkflow.load() self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") for outputMod in stepOneMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneMergeSub.loadData() self.assertEqual(stepOneMergeSub["type"], "Merge", "Error: Step one sub has wrong type.") stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc") stepThreeWorkflow.load() self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset) stepThreeSub.loadData() self.assertEqual(stepThreeSub["type"], "Processing", "Error: Step three sub has wrong type.") for outputMod in stepThreeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule") stepThreeCleanupWorkflow.load() self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeCleanupSub.loadData() self.assertEqual(stepThreeCleanupSub["type"], "Cleanup", "Error: Step three sub has wrong type.") stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect") stepThreeLogCollectWorkflow.load() self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset) stepThreeLogCollectSub.loadData() self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect", "Error: Step three sub has wrong type.") stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule") stepThreeMergeWorkflow.load() self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeMergeSub.loadData() self.assertEqual(stepThreeMergeSub["type"], "Merge", "Error: Step three sub has wrong type.") for outputMod in stepThreeMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return def verifyDiscardRAW(self): """ _verifyDiscardRAW_ Verify that a workflow that discards the RAW was installed into WMBS correctly. """ topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return def verifyKeepAOD(self): """ _verifyKeepAOD_ Verify that a workflow that only produces AOD in a single step was installed correctly into WMBS. """ topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepTwoUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-aodOutputModule") stepTwoUnmergedAODFileset.loadData() stepTwoMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-Merged") stepTwoMergedAODFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeAODLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-logArchive") stepTwoMergeAODLogArchiveFileset.loadData() stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepTwoWorkflow.load() self.assertTrue("aodOutputModule" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id, "Error: AOD output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepTwoUnmergedAODFileset.id, "Error: AOD output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedaodOutputModule") stepTwoCleanupAODWorkflow.load() self.assertEqual(len(stepTwoCleanupAODWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupAODSub = Subscription(workflow = stepTwoCleanupAODWorkflow, fileset = stepTwoUnmergedAODFileset) stepTwoCleanupAODSub.loadData() self.assertEqual(stepTwoCleanupAODSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule") stepTwoMergeAODWorkflow.load() self.assertTrue("Merged" in stepTwoMergeAODWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeAODWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepTwoMergeAODSub = Subscription(workflow = stepTwoMergeAODWorkflow, fileset = stepTwoUnmergedAODFileset) stepTwoMergeAODSub.loadData() self.assertEqual(stepTwoMergeAODSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeAODWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeAODWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return def testChainedReDigi(self): """ _testChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW data can be created and installed into WMBS correctly. This will only verify the step one/step two information in WMBS as the step three information is the same as the dependent workflow. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput" defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput" defaultArguments["MCPileup"] = PILEUP_DATASET defaultArguments["KeepStepOneOutput"] = False factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) # Verify that pileup is configured for both of the cmsRun steps in the # top level task. topLevelTask = testWorkload.getTopLevelTask()[0] cmsRun1Helper = topLevelTask.getStepHelper("cmsRun1") cmsRun2Helper = topLevelTask.getStepHelper("cmsRun2") cmsRun1PileupConfig = cmsRun1Helper.getPileup() cmsRun2PileupConfig = cmsRun2Helper.getPileup() self.assertTrue(cmsRun1PileupConfig.mc.dataset, "/some/cosmics/dataset") self.assertTrue(cmsRun2PileupConfig.mc.dataset, "/some/cosmics/dataset") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return def testThreeStepChainedReDigi(self): """ _testThreeStepChaninedReDigi_ Verify that a chained ReDigi workflow that discards RAW and RECO data can be created and installed into WMBS correctly. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["KeepStepOneOutput"] = False defaultArguments["KeepStepTwoOutput"] = False defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput" defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput" factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) self.assertTrue(len(testWorkload.getTopLevelTask()) == 1, "Error: Wrong number of top level tasks.") topLevelTask = testWorkload.getTopLevelTask()[0] topLevelStep = topLevelTask.steps() cmsRun2Step = topLevelStep.getStep("cmsRun2").getTypeHelper() self.assertTrue(len(cmsRun2Step.listOutputModules()) == 2, "Error: Wrong number of output modules in cmsRun2.") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyKeepAOD() return def testCombinedReDigiRecoConfig(self): """ _testCombinedReDigiRecoConfig_ Verify that a ReDigi workflow that uses a single step one config installs into WMBS correctly. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase, combinedStepOne = True) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[2] defaultArguments["StepOneOutputModuleName"] = "RECODEBUGoutput" factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyDiscardRAW() return def testSingleStepReDigi(self): """ _testSingleStepReDigi_ Verify that a single step ReDigi workflow can be created and installed correctly into WMBS. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[2] factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.verifyKeepAOD() return
class ReRecoTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("rereco_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.testDir = self.testInit.generateWorkDir() couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("rereco_t") myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.listTasksByWorkflow = self.daoFactory( classname="Workflow.LoadFromName") self.listFilesets = self.daoFactory(classname="Fileset.List") self.listSubsMapping = self.daoFactory( classname="Subscriptions.ListSubsAndFilesetsFromWorkflow") return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def injectReRecoConfig(self): """ _injectReRecoConfig_ Inject a ReReco config document that we can use to set the outputModules """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" newConfig["owner"] = {"group": "cmsdataops", "user": "******"} newConfig["pset_tweak_details"] = { "process": { "outputModules_": ['RECOoutput', 'DQMoutput'], "RECOoutput": { 'dataset': { 'filterName': 'RECOoutputFilter', 'dataTier': 'RECO' } }, "DQMoutput": { 'dataset': { 'filterName': 'DQMoutputFilter', 'dataTier': 'DQM' } } } } result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def injectDQMHarvestConfig(self): """ _injectDQMHarvest_ Create a bogus config cache document for DQMHarvest and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e234f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10876a7" newConfig["owner"] = {"group": "DATAOPS", "user": "******"} newConfig["pset_tweak_details"] = {"process": {"outputModules_": []}} result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def injectSkimConfig(self): """ _injectSkimConfig_ Create a bogus config cache document for the skims and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" newConfig["owner"] = {"group": "cmsdataops", "user": "******"} newConfig["pset_tweak_details"] = { "process": { "outputModules_": ["SkimA", "SkimB"], "SkimA": { "dataset": { "filterName": "SkimAFilter", "dataTier": "RAW-RECO" } }, "SkimB": { "dataset": { "filterName": "SkimBFilter", "dataTier": "USER" } } } } result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput. \ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "DQMoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedDqm = Fileset( name="/TestWorkload/DataProcessing/unmerged-DQMoutput") unmergedDqm.loadData() dqmMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput") dqmMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDqm, workflow=dqmMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "DQMoutput"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive" ) procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect" ) procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive" ) procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect" ) procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") skimWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim" ) skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged" ) topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged" ) dqmWorkflow.load() topLevelFileset = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged" ) topLevelFileset.loadData() dqmSubscription = Subscription(fileset=topLevelFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive" ) dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect" ) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) # test default values taskPaths = { '/TestWorkload/DataProcessing': ['cmsRun1', 'stageOut1', 'logArch1'], '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim': ['cmsRun1', 'stageOut1', 'logArch1'], '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': ['cmsRun1', 'upload1', 'logArch1'] } for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in taskPaths[task]: stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # FIXME: not sure whether we should set performance parameters to Harvest jobs?!? if task == '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': continue # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided dataProcArguments["Multicore"] = 6 dataProcArguments["Memory"] = 4600.0 dataProcArguments["EventStreams"] = 3 testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in taskPaths[task]: stepHelper = taskObj.getStepHelper(step) if not task.endswith('DQMHarvestMerged') and step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), dataProcArguments["Multicore"]) self.assertEqual(stepHelper.getNumberOfStreams(), dataProcArguments["EventStreams"]) elif step in ('stageOut1', 'upload1', 'logArch1'): self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) else: self.assertEqual(stepHelper.getNumberOfCores(), 1, "%s should be single-core" % task) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # FIXME: not sure whether we should set performance parameters to Harvest jobs?!? if task == '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': continue # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], dataProcArguments["Memory"]) return def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = [ '/TestWorkload/DataProcessing', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput' ] expWfTasks = [ '/TestWorkload/DataProcessing', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedDQMoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingDQMoutputMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/SomeSkimSkimAMergeLogCollect', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/SomeSkimSkimBMergeLogCollect', '/TestWorkload/DataProcessing/LogCollect' ] expFsets = [ 'TestWorkload-DataProcessing-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/DataProcessing/unmerged-RECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimB', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive', '/TestWorkload/DataProcessing/unmerged-DQMoutput', '/TestWorkload/DataProcessing/unmerged-logArchive' ] subMaps = [ (15, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect', 'MinFileBased', 'LogCollect'), (16, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingDQMoutputMergeLogCollect', 'MinFileBased', 'LogCollect'), (14, '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged', 'Harvest', 'Harvesting'), (11, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect', 'MinFileBased', 'LogCollect'), (3, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim', 'FileBased', 'Skim'), (5, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA/SomeSkimSkimAMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/merged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB/SomeSkimSkimBMergeLogCollect', 'MinFileBased', 'LogCollect'), (10, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimLogCollect', 'MinFileBased', 'LogCollect'), (6, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimA', 'SiblingProcessingBased', 'Cleanup'), (4, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimA', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimA', 'ParentlessMergeBySize', 'Merge'), (9, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkimB', 'SiblingProcessingBased', 'Cleanup'), (7, '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-SkimB', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkimB', 'ParentlessMergeBySize', 'Merge'), (17, '/TestWorkload/DataProcessing/unmerged-DQMoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedDQMoutput', 'SiblingProcessingBased', 'Cleanup'), (13, '/TestWorkload/DataProcessing/unmerged-DQMoutput', '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput', 'ParentlessMergeBySize', 'Merge'), (18, '/TestWorkload/DataProcessing/unmerged-logArchive', '/TestWorkload/DataProcessing/LogCollect', 'MinFileBased', 'LogCollect'), (12, '/TestWorkload/DataProcessing/unmerged-RECOoutput', '/TestWorkload/DataProcessing/DataProcessingCleanupUnmergedRECOoutput', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/DataProcessing/unmerged-RECOoutput', '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput', 'ParentlessMergeBySize', 'Merge'), (1, 'TestWorkload-DataProcessing-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/DataProcessing', 'EventAwareLumiBased', 'Processing') ] testArguments = ReRecoWorkloadFactory.getTestArguments() testArguments["ConfigCacheID"] = self.injectReRecoConfig() testArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": self.injectSkimConfig() }) testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "rereco_t" testArguments["EnableHarvesting"] = True testArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) print("Tasks producing output:\n%s" % pformat(testWorkload.listOutputProducingTasks())) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") print("List of workflow tasks:\n%s" % pformat([item['task'] for item in workflows])) self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() print("List of filesets:\n%s" % pformat([item[1] for item in filesets])) self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) print("List of subscriptions:\n%s" % pformat(subscriptions)) self.assertItemsEqual(subscriptions, subMaps)
class JobSubmitterTest(unittest.TestCase): """ _JobSubmitterTest_ Test class for the JobSubmitterPoller """ def setUp(self): """ _setUp_ Standard setup: Now with 100% more couch """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"]) self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump") self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump") self.testInit.setupCouch("wmagent_summary_t", "WMStats") myThread = threading.currentThread() self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.baDaoFactory = DAOFactory(package = "WMCore.BossAir", logger = myThread.logger, dbinterface = myThread.dbi) self.testDir = self.testInit.generateWorkDir() # Set heartbeat self.componentName = 'JobSubmitter' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() return def tearDown(self): """ _tearDown_ Standard tearDown """ self.testInit.clearDatabase() self.testInit.delWorkDir() self.testInit.tearDownCouch() return def setResourceThresholds(self, site, **options): """ _setResourceThresholds_ Utility to set resource thresholds """ if not options: options = {'state' : 'Normal', 'runningSlots' : 10, 'pendingSlots' : 5, 'tasks' : ['Processing', 'Merge'], 'Processing' : {'pendingSlots' : 5, 'runningSlots' : 10}, 'Merge' : {'pendingSlots' : 2, 'runningSlots' : 5}} resourceControl = ResourceControl() resourceControl.insertSite(siteName = site, seName = 'se.%s' % (site), ceName = site, plugin = "MockPlugin", pendingSlots = options['pendingSlots'], runningSlots = options['runningSlots'], cmsName = site) for task in options['tasks']: resourceControl.insertThreshold(siteName = site, taskType = task, maxSlots = options[task]['runningSlots'], pendingSlots = options[task]['pendingSlots']) if options.get('state'): resourceControl.changeSiteState(site, options.get('state')) return def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, bl = [], wl = [], taskType = 'Processing', name = None): """ _createJobGroups_ Creates a series of jobGroups for submissions """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec = workloadSpec, owner = "mnorman", name = name, task = "basicWorkload/Production") testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name = name) testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = taskType, split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name = name, task = task, nJobs = nJobs, jobGroup = testJobGroup, fileset = testFileset, sub = testSubscription.exists(), site = site, bl = bl, wl = wl) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site, bl = [], wl = []): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn = "/singleLfn/%s/%s" % (name, n), size = 1024, events = 10) if type(site) == list: for singleSite in site: testFile.setLocation(singleSite) else: testFile.setLocation(site) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name = '%s-%i' % (name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob['priority'] = 101 jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'), 'w') pickle.dump(testJob, output) output.close() return testJob, testFile def getConfig(self): """ _getConfig_ Gets a basic config from default location """ config = Configuration() config.component_("Agent") config.Agent.WMSpecDirectory = self.testDir config.Agent.agentName = 'testAgent' config.Agent.componentName = self.componentName config.Agent.useHeartbeat = False #First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", self.testDir) #Now the CoreDatabase information config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") # BossAir and MockPlugin configuration config.section_("BossAir") config.BossAir.pluginNames = ['MockPlugin'] config.BossAir.pluginDir = 'WMCore.BossAir.Plugins' config.BossAir.multicoreTaskTypes = ['MultiProcessing', 'MultiProduction'] config.BossAir.nCondorProcesses = 1 config.BossAir.section_("MockPlugin") config.BossAir.MockPlugin.fakeReport = os.path.join(getTestBase(), 'WMComponent_t/JobSubmitter_t', "submit.sh") # JobSubmitter configuration config.component_("JobSubmitter") config.JobSubmitter.logLevel = 'DEBUG' config.JobSubmitter.maxThreads = 1 config.JobSubmitter.pollInterval = 10 config.JobSubmitter.submitScript = os.path.join(getTestBase(), 'WMComponent_t/JobSubmitter_t', 'submit.sh') config.JobSubmitter.componentDir = os.path.join(self.testDir, 'Components') config.JobSubmitter.workerThreads = 2 config.JobSubmitter.jobsPerWorker = 200 #JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL') config.JobStateMachine.couchDBName = "jobsubmitter_t" config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t' # Needed, because this is a test os.makedirs(config.JobSubmitter.componentDir) return config def createTestWorkload(self, workloadName = 'Tier1ReReco'): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload(workloadName) taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() return workload def testA_BasicTest(self): """ Use the MockPlugin to create a simple test Check to see that all the jobs were "submitted", don't care about thresholds """ workloadName = "basicWorkload" workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 2 nJobs = 20 site = 'T2_US_UCSD' self.setResourceThresholds(site, pendingSlots = 50, runningSlots = 100, tasks = ['Processing', 'Merge'], Processing = {'pendingSlots' : 50, 'runningSlots' : 100}, Merge = {'pendingSlots' : 50, 'runningSlots' : 100}) jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % site) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Do pre-submit check getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) jobSubmitter = JobSubmitterPoller(config = config) jobSubmitter.algorithm() # Check that jobs are in the right state result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) # Check assigned locations getLocationAction = self.daoFactory(classname = "Jobs.GetLocation") for jobId in result: loc = getLocationAction.execute(jobid = jobId) self.assertEqual(loc, [['T2_US_UCSD']]) # Run another cycle, it shouldn't submit anything. There isn't anything to submit jobSubmitter.algorithm() result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) nSubs = 1 nJobs = 10 # Submit another 10 jobs jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % site, taskType = "Merge") for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Check that the jobs are available for submission and run another cycle result = getJobsAction.execute(state = 'Created', jobType = "Merge") self.assertEqual(len(result), nSubs * nJobs) jobSubmitter.algorithm() #Check that the last 10 jobs were submitted as well. result = getJobsAction.execute(state = 'Created', jobType = "Merge") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Executing', jobType = "Merge") self.assertEqual(len(result), nSubs * nJobs) return def testB_thresholdTest(self): """ _testB_thresholdTest_ Check that the threshold management is working, this requires checks on pending/running jobs globally at a site and per task/site """ workloadName = "basicWorkload" workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 5 nJobs = 10 sites = ['T1_US_FNAL'] for site in sites: self.setResourceThresholds(site, pendingSlots = 50, runningSlots = 200, tasks = ['Processing', 'Merge'], Processing = {'pendingSlots' : 45, 'runningSlots' :-1}, Merge = {'pendingSlots' : 10, 'runningSlots' : 20, 'priority' : 5}) # Always initialize the submitter after setting the sites, flaky! jobSubmitter = JobSubmitterPoller(config = config) jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Do pre-submit check getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) jobSubmitter.algorithm() # Check that jobs are in the right state, # here we are limited by the pending threshold for the Processing task (45) result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 5) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 45) # Check assigned locations getLocationAction = self.daoFactory(classname = "Jobs.GetLocation") for jobId in result: loc = getLocationAction.execute(jobid = jobId) self.assertEqual(loc, [['T1_US_FNAL']]) # Run another cycle, it shouldn't submit anything. Jobs are still in pending jobSubmitter.algorithm() result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 5) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 45) # Now put 10 Merge jobs, only 5 can be submitted, there we hit the global pending threshold for the site nSubs = 1 nJobs = 10 jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL', taskType = 'Merge') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() result = getJobsAction.execute(state = 'Created', jobType = "Merge") self.assertEqual(len(result), 5) result = getJobsAction.execute(state = 'Executing', jobType = "Merge") self.assertEqual(len(result), 5) result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 5) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 45) # Now let's test running thresholds # The scenario will be setup as follows: Move all current jobs as running # Create 300 Processing jobs and 300 merge jobs # Run 5 polling cycles, moving all pending jobs to running in between # Result is, merge is left at 25 running 0 pending and processing is left at 215 running 0 pending # Processing has 135 jobs in queue and Merge 285 # This tests all threshold dynamics including the prioritization of merge over processing nSubs = 1 nJobs = 300 jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL') jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL', taskType = 'Merge')) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') getRunJobID = self.baDaoFactory(classname = "LoadByWMBSID") setRunJobStatus = self.baDaoFactory(classname = "SetStatus") for _ in range(5): result = getJobsAction.execute(state = 'Executing') binds = [] for jobId in result: binds.append({'id' : jobId, 'retry_count' : 0}) runJobIds = getRunJobID.execute(binds) setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running') jobSubmitter.algorithm() result = getJobsAction.execute(state = 'Executing', jobType = 'Processing') self.assertEqual(len(result), 215) result = getJobsAction.execute(state = 'Created', jobType = 'Processing') self.assertEqual(len(result), 135) result = getJobsAction.execute(state = 'Executing', jobType = 'Merge') self.assertEqual(len(result), 25) result = getJobsAction.execute(state = 'Created', jobType = 'Merge') self.assertEqual(len(result), 285) return def testC_prioritization(self): """ _testC_prioritization_ Check that jobs are prioritized by job type and by oldest workflow """ workloadName = "basicWorkload" workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 1 nJobs = 10 sites = ['T1_US_FNAL'] for site in sites: self.setResourceThresholds(site, pendingSlots = 10, runningSlots = -1, tasks = ['Processing', 'Merge'], Processing = {'pendingSlots' : 50, 'runningSlots' :-1}, Merge = {'pendingSlots' : 10, 'runningSlots' :-1, 'priority' : 5}) # Always initialize the submitter after setting the sites, flaky! jobSubmitter = JobSubmitterPoller(config = config) jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL', name = 'OldestWorkflow') jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL', taskType = 'Merge')) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() # Merge goes first getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Created', jobType = "Merge") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Executing', jobType = "Merge") self.assertEqual(len(result), 10) result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 10) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 0) # Create a newer workflow processing, and after some new jobs for an old workflow jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL', name = 'NewestWorkflow') jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL', name = 'OldestWorkflow')) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Move pending jobs to running getRunJobID = self.baDaoFactory(classname = "LoadByWMBSID") setRunJobStatus = self.baDaoFactory(classname = "SetStatus") for idx in range(2): result = getJobsAction.execute(state = 'Executing') binds = [] for jobId in result: binds.append({'id' : jobId, 'retry_count' : 0}) runJobIds = getRunJobID.execute(binds) setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running') # Run again on created workflows jobSubmitter.algorithm() result = getJobsAction.execute(state = 'Created', jobType = "Merge") self.assertEqual(len(result), 0) result = getJobsAction.execute(state = 'Executing', jobType = "Merge") self.assertEqual(len(result), 10) result = getJobsAction.execute(state = 'Created', jobType = "Processing") self.assertEqual(len(result), 30 - (idx + 1) * 10) result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), (idx + 1) * 10) # Check that older workflow goes first even with newer jobs getWorkflowAction = self.daoFactory(classname = "Jobs.GetWorkflowTask") workflows = getWorkflowAction.execute(result) for workflow in workflows: self.assertEqual(workflow['name'], 'OldestWorkflow') return def testD_WhiteListBlackList(self): """ _testD_WhiteListBlackList_ Test the whitelist/blacklist implementation Trust the jobCreator to get this in the job right """ workloadName = "basicWorkload" workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 2 nJobs = 10 sites = ['T2_US_Florida', 'T2_TW_Taiwan', 'T2_CH_CERN', 'T3_CO_Uniandes'] for site in sites: self.setResourceThresholds(site, pendingSlots = 1000, runningSlots = -1, tasks = ['Processing', 'Merge'], Processing = {'pendingSlots' : 5000, 'runningSlots' :-1}, Merge = {'pendingSlots' : 1000, 'runningSlots' :-1, 'priority' : 5}) jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, site = 'se.%s' % sites[-1], task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), bl = sites[:-1]) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config = config) # Actually run it jobSubmitter.algorithm() getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) # All jobs should be at T3_CO_Uniandes # Check assigned locations getLocationAction = self.daoFactory(classname = "Jobs.GetLocation") locationDict = getLocationAction.execute([{'jobid' : x} for x in result]) for entry in locationDict: loc = entry['site_name'] self.assertEqual(loc, 'T3_CO_Uniandes') # Run again and test the whiteList jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), site = 'se.%s' % 'T2_CH_CERN', workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), wl = ['T2_CH_CERN']) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Run it jobSubmitter.algorithm() # You'll have jobs from the previous run still in the database result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs * 2) # All jobs should be at CERN or Uniandes locationDict = getLocationAction.execute([{'jobid' : x} for x in result]) for entry in locationDict[nSubs * nJobs:]: loc = entry['site_name'] self.assertEqual(loc, 'T2_CH_CERN') # Run again with an invalid whitelist # After this point, the original two sets of jobs will be executing # The rest of the jobs should move to submitFailed jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), site = 'se.%s' % 'T2_CH_CERN', workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), wl = ['T2_US_Namibia']) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() # Jobs should be gone getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs * 2) result = getJobsAction.execute(state = 'SubmitFailed', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) # Run again with all sites blacklisted jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), site = ['se.%s' % x for x in sites], workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), bl = sites) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() # Jobs should go to submit failed getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs * 2) result = getJobsAction.execute(state = 'SubmitFailed', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs * 2) return def testE_SiteModesTest(self): """ _testE_SiteModesTest_ Test the behavior of the submitter in response to the different states of the sites """ workloadName = "basicWorkload" workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 1 nJobs = 20 sites = ['T2_US_Florida', 'T2_TW_Taiwan', 'T3_CO_Uniandes', 'T1_US_FNAL'] for site in sites: self.setResourceThresholds(site, pendingSlots = 10, runningSlots = -1, tasks = ['Processing', 'Merge'], Processing = {'pendingSlots' : 10, 'runningSlots' :-1}, Merge = {'pendingSlots' : 10, 'runningSlots' :-1, 'priority' : 5}) myResourceControl = ResourceControl() myResourceControl.changeSiteState('T2_US_Florida', 'Draining') # First test that we prefer Normal over drain, and T1 over T2/T3 jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, site = ['se.%s' % x for x in sites], task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName)) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config = config) # Actually run it jobSubmitter.algorithm() getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs") result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) # All jobs should be at either FNAL, Taiwan or Uniandes. It's a random selection # Check assigned locations getLocationAction = self.daoFactory(classname = "Jobs.GetLocation") locationDict = getLocationAction.execute([{'jobid' : x} for x in result]) for entry in locationDict: loc = entry['site_name'] self.assertNotEqual(loc, 'T2_US_Florida') # Now set everything to down, check we don't submit anything for site in sites: myResourceControl.changeSiteState(site, 'Down') jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, site = ['se.%s' % x for x in sites], task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName)) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() # Nothing is submitted despite the empty slots at Uniandes and Florida result = getJobsAction.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nSubs * nJobs) # Now set everything to Aborted, and create Merge jobs. Those should fail # since the can only run at one place for site in sites: myResourceControl.changeSiteState(site, 'Aborted') nSubsMerge = 1 nJobsMerge = 5 jobGroupList = self.createJobGroups(nSubs = nSubsMerge, nJobs = nJobsMerge, site = ['se.%s' % x for x in sites], task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), taskType = 'Merge') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() result = getJobsAction.execute(state = 'SubmitFailed', jobType = 'Merge') self.assertEqual(len(result), nSubsMerge * nJobsMerge) result = getJobsAction.execute(state = 'Executing', jobType = 'Processing') self.assertEqual(len(result), nSubs * nJobs) return @attr('performance') def testF_PollerProfileTest(self): """ _testF_PollerProfileTest_ Submit a lot of jobs and test how long it takes for them to actually be submitted """ workloadName = "basicWorkload" workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 100 nJobs = 100 sites = ['T1_US_FNAL'] for site in sites: self.setResourceThresholds(site, pendingSlots = 20000, runningSlots = -1, tasks = ['Processing', 'Merge'], Processing = {'pendingSlots' : 10000, 'runningSlots' :-1}, Merge = {'pendingSlots' : 10000, 'runningSlots' :-1, 'priority' : 5}) # Always initialize the submitter after setting the sites, flaky! jobSubmitter = JobSubmitterPoller(config = config) jobGroupList = self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL') jobGroupList.extend(self.createJobGroups(nSubs = nSubs, nJobs = nJobs, task = workload.getTask("ReReco"), workloadSpec = os.path.join(self.testDir, 'workloadTest', workloadName), site = 'se.%s' % 'T1_US_FNAL', taskType = 'Merge')) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Actually run it startTime = time.time() cProfile.runctx("jobSubmitter.algorithm()", globals(), locals(), filename = "testStats.stat") stopTime = time.time() print "Job took %f seconds to complete" % (stopTime - startTime) p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return
class RetryManagerTest(unittest.TestCase): """ TestCase for TestRetryManager module """ def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) self.testInit.setupCouch("retry_manager_t/jobs", "JobDump") self.testInit.setupCouch("retry_manager_t/fwjrs", "FWJRDump") self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.getJobs = self.daofactory(classname = "Jobs.GetAllJobs") self.setJobTime = self.daofactory(classname = "Jobs.SetStateTime") self.testDir = self.testInit.generateWorkDir() self.nJobs = 10 return def tearDown(self): """ Database deletion """ self.testInit.clearDatabase() self.testInit.delWorkDir() self.testInit.tearDownCouch() return def getConfig(self): """ _getConfig_ """ config = Configuration() # First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", self.testDir) config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.component_("RetryManager") config.RetryManager.logLevel = 'DEBUG' config.RetryManager.namespace = 'WMComponent.RetryManager.RetryManager' config.RetryManager.maxRetries = 10 config.RetryManager.pollInterval = 10 # These are the cooloff times for the RetryManager, the times it waits # Before attempting resubmission config.RetryManager.coolOffTime = {'create': 120, 'submit': 120, 'job': 120} # Path to plugin directory config.RetryManager.pluginPath = 'WMComponent.RetryManager.PlugIns' #config.RetryManager.pluginName = '' config.RetryManager.WMCoreBase = WMCore.WMBase.getWMBASE() config.RetryManager.componentDir = os.path.join(os.getcwd(), 'Components') # ErrorHandler # Not essential, but useful for ProcessingAlgo config.component_("ErrorHandler") config.ErrorHandler.maxRetries = 5 # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', None) config.JobStateMachine.couchDBName = "retry_manager_t" return config def createTestJobGroup(self, nJobs): """ _createTestJobGroup_ Creates a group of several jobs """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup def testCreatePaused(self): testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.pluginName = 'PauseAlgo' config.RetryManager.coolOffTime = {'create': 20, 'submit': 20, 'job': 20} config.RetryManager.pauseCount = 2 changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') # Making sure that jobs are in submitcooloff state idList = self.getJobs.execute(state = 'CreateCoolOff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 20) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'new') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 400) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'createpaused') self.assertEqual(len(idList), self.nJobs) # emulating ops retrying the job changer.propagate(testJobGroup.jobs, 'new', 'createpaused') # Making sure it did the right thing idList = self.getJobs.execute(state = 'new') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 8000) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'new') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 160000) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'createpaused') self.assertEqual(len(idList), self.nJobs) return def testSubmitPaused(self): testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.pluginName = 'PauseAlgo' config.RetryManager.coolOffTime = {'create': 20, 'submit': 20, 'job': 20} config.RetryManager.pauseCount = 2 changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') # Making sure that jobs are in submitcooloff state idList = self.getJobs.execute(state = 'SubmitCoolOff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 20) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 400) # Make sure they end up in submitpaused testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'submitpaused') self.assertEqual(len(idList), self.nJobs) # emulating ops retrying the job changer.propagate(testJobGroup.jobs, 'created', 'submitpaused') # Fail them out again changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 8000) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 160000) # Make sure they end up finally (again) in the paused state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'submitpaused') self.assertEqual(len(idList), self.nJobs) return def testA_Create(self): """ WMComponent_t.RetryManager_t.RetryManager_t:testCreate() Mimics creation of component and test jobs failed in create stage. """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'New') self.assertEqual(len(idList), self.nJobs) return def testB_Submit(self): """ WMComponent_t.RetryManager_t.RetryManager_t:testSubmit() Mimics creation of component and test jobs failed in create stage. """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) return def testC_Job(self): """ WMComponent_t.RetryManager_t.RetryManager_t:testJob() Mimics creation of component and test jobs failed in create stage. """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.pluginName = 'DefaultRetryAlgo' changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') idList = self.getJobs.execute(state = 'JobCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) return def testD_SquaredAlgo(self): """ _testSquaredAlgo_ Test the squared algorithm to make sure it loads and works """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.pluginName = 'SquaredAlgo' config.RetryManager.coolOffTime = {'create': 10, 'submit': 10, 'job': 10} changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 12) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) def testE_ExponentialAlgo(self): """ _testExponentialAlgo_ Test the exponential algorithm to make sure it loads and works """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.pluginName = 'ExponentialAlgo' config.RetryManager.coolOffTime = {'create': 10, 'submit': 10, 'job': 10} changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 12) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) def testF_LinearAlgo(self): """ _testLinearAlgo_ Test the linear algorithm to make sure it loads and works """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.pluginName = 'ExponentialAlgo' config.RetryManager.coolOffTime = {'create': 10, 'submit': 10, 'job': 10} changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 12) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) return def testG_ProcessingAlgo(self): """ _ProcessingAlgo_ Test for the ProcessingAlgo Prototype """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.pluginName = 'ProcessingAlgo' config.RetryManager.coolOffTime = {'create': 10, 'submit': 10, 'job': 10} changer = ChangeState(config) fwjrPath = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t", "fwjrs/badBackfillJobReport.pkl") report = Report() report.load(fwjrPath) for job in testJobGroup.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save(os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') testRetryManager = RetryManagerPoller(config) testRetryManager.algorithm() idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 1) report.save(os.path.join(j['cache_dir'], "Report.%i.pkl" % j['retry_count'])) config.RetryManager.ProcessingAlgoOneMoreErrorCodes = [8020] testRetryManager2 = RetryManagerPoller(config) testRetryManager2.algorithm() idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 5) # Now test timeout testJobGroup2 = self.createTestJobGroup(nJobs = self.nJobs) # Cycle jobs for job in testJobGroup2.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save(os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup2.jobs, 'created', 'new') changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup2.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 0) config.RetryManager.ProcessingAlgoOneMoreErrorCodes = [] config.RetryManager.ProcessingAlgoMaxRuntime = 1 testRetryManager3 = RetryManagerPoller(config) testRetryManager3.algorithm() idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs * 2) for job in testJobGroup2.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 5) return def testH_PauseAlgo(self): testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.pluginName = 'PauseAlgo' config.RetryManager.coolOffTime = {'create': 20, 'submit': 20, 'job': 20} config.RetryManager.pauseCount = 2 changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') # Making sure that jobs are in submitcooloff state idList = self.getJobs.execute(state = 'JobCoolOff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 20) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') # Make sure that no change happens after timeout idList = self.getJobs.execute(state = 'JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 400) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'jobpaused') self.assertEqual(len(idList), self.nJobs) # emulating ops retrying the job changer.propagate(testJobGroup.jobs, 'new', 'createpaused') # Making sure it did the right thing idList = self.getJobs.execute(state = 'new') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 8000) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 160000) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'jobpaused') self.assertEqual(len(idList), self.nJobs) return def testY_MultipleIterations(self): """ _MultipleIterations_ Paranoia based check to see if I'm saving class instances correctly """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'New') self.assertEqual(len(idList), self.nJobs) # Make a new jobGroup for a second run testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) # Set job state changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') # Set them to go off for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 200) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'New') self.assertEqual(len(idList), self.nJobs * 2) return def testZ_Profile(self): """ _Profile_ Do a basic profiling of the algo """ return import cProfile, pstats nJobs = 1000 testJobGroup = self.createTestJobGroup(nJobs = nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) startTime = time.time() #cProfile.runctx("testRetryManager.algorithm()", globals(), locals(), filename = "profStats.stat") testRetryManager.algorithm(None) stopTime = time.time() idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'New') self.assertEqual(len(idList), nJobs) print("Took %f seconds to run polling algo" % (stopTime - startTime)) p = pstats.Stats('profStats.stat') p.sort_stats('cumulative') p.print_stats(0.2) return
class MonteCarloFromGENTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("mclhe_t", "ConfigCache") self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("mclhe_t") self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def injectConfig(self): """ _injectConfig_ Create a bogus config cache document and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" newConfig["owner"] = {"group": "cmsdataops", "user": "******"} newConfig["pset_tweak_details"] = {"process": {"outputModules_": ["outputRECORECO", "outputALCARECOALCARECO"], "outputRECORECO": {"dataset": {"filterName": "FilterRECO", "dataTier": "RECO"}}, "outputALCARECOALCARECO": {"dataset": {"filterName": "FilterALCARECO", "dataTier": "ALCARECO"}}}} result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = MonteCarloFromGENWorkloadFactory.getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) outputDatasets = testWorkload.listOutputDatasets() self.assertEqual(len(outputDatasets), 2) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterRECO-FAKE-v1/RECO" in outputDatasets) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-FAKE-v1/ALCARECO" in outputDatasets) productionTask = testWorkload.getTaskByPath('/TestWorkload/MonteCarloFromGEN') splitting = productionTask.jobSplittingParameters() self.assertFalse(splitting["deterministicPileup"]) testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'production') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return def testMCFromGENWithPileup(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = MonteCarloFromGENWorkloadFactory.getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" # Add pileup inputs arguments["MCPileup"] = "/some/cosmics-procstringwhatever-v1/RAW" arguments["DataPileup"] = "/some/minbias-procstringwhatever-v1/LHE" arguments["DeterministicPileup"] = True factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) productionTask = testWorkload.getTaskByPath('/TestWorkload/MonteCarloFromGEN') cmsRunStep = productionTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertEqual(pileupData.data.dataset, ["/some/minbias-procstringwhatever-v1/LHE"]) self.assertEqual(pileupData.mc.dataset, ["/some/cosmics-procstringwhatever-v1/RAW"]) splitting = productionTask.jobSplittingParameters() self.assertTrue(splitting["deterministicPileup"]) def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ defaultArguments = MonteCarloFromGENWorkloadFactory.getTestArguments() defaultArguments["ConfigCacheID"] = self.injectConfig() defaultArguments["CouchDBName"] = "mclhe_t" defaultArguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) # test default values taskObj = testWorkload.getTask('MonteCarloFromGEN') for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided defaultArguments["Multicore"] = 6 defaultArguments["Memory"] = 4600.0 testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) taskObj = testWorkload.getTask('MonteCarloFromGEN') for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), defaultArguments["Multicore"]) else: self.assertEqual(stepHelper.getNumberOfCores(), 1) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], defaultArguments["Memory"]) return
class URLFetcherTest(unittest.TestCase): """ Main test for the URLFetcher """ def setUp(self): """ Basic setUp """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ Basic tearDown """ self.testInit.delWorkDir() return def createTask(self, fileURL): """ _createTask_ Create a test task that includes the fileURL """ task = WMTask.makeWMTask("testTask") task.makeStep("step1") task.makeStep("step2") for t in task.steps().nodeIterator(): t = WMStep.WMStepHelper(t) os.mkdir('%s/%s' % (self.testDir, t.name())) t.data.sandbox.section_('file0') t.data.sandbox.file0.src = fileURL return task def testA_BasicFunction(self): """ _BasicFunction_ Test and see if we can retrieve a basic file URL should not have http:// prefix in it """ url = 'cmsweb.cern.ch' task = self.createTask(fileURL = 'http://%s' % url) fetcher = URLFetcher() fetcher.setWorkingDirectory(workingDir = self.testDir) fetcher(wmTask = task) f = open(os.path.join(self.testDir, 'step2', url)) content = f.read() f.close() for x in ['html', 'CMS']: self.assertNotEqual( content.find(x), -1 )
class URLFetcherTest(unittest.TestCase): """ Main test for the URLFetcher """ def setUp(self): """ Basic setUp """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ Basic tearDown """ self.testInit.delWorkDir() return def createTask(self, fileURL): """ _createTask_ Create a test task that includes the fileURL """ task = WMTask.makeWMTask("testTask") task.makeStep("step1") task.makeStep("step2") for t in task.steps().nodeIterator(): t = WMStep.WMStepHelper(t) os.mkdir('%s/%s' % (self.testDir, t.name())) t.data.sandbox.section_('file0') t.data.sandbox.file0.src = fileURL return task def testA_BasicFunction(self): """ _BasicFunction_ Test and see if we can retrieve a basic file URL should not have http:// prefix in it """ url = 'cmsweb.cern.ch' task = self.createTask(fileURL='http://%s' % url) fetcher = URLFetcher() fetcher.setWorkingDirectory(workingDir=self.testDir) fetcher(wmTask=task) f = open(os.path.join(self.testDir, 'step2', url)) content = f.read() f.close() for x in ['html', 'CMS']: self.assertNotEqual(content.find(x), -1)
class TaskArchiverTest(unittest.TestCase): """ TestCase for TestTaskArchiver module """ _setup_done = False _teardown = False _maxMessage = 10 OWNERDN = os.environ['OWNERDN'] if 'OWNERDN' in os.environ else "Generic/OWNERDN" def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBS3Buffer"], useDefault = False) self.databaseName = "taskarchiver_t_0" self.testInit.setupCouch("%s/workloadsummary" % self.databaseName, "WorkloadSummary") self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump") self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump") self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.getJobs = self.daofactory(classname = "Jobs.GetAllJobs") self.inject = self.daofactory(classname = "Workflow.MarkInjectedWorkflows") self.testDir = self.testInit.generateWorkDir() os.makedirs(os.path.join(self.testDir, 'specDir')) self.nJobs = 10 self.campaignName = 'aCampaign' self.alertsReceiver = None self.uploadPublishInfo = False self.uploadPublishDir = None return def tearDown(self): """ Database deletion """ myThread = threading.currentThread() self.testInit.clearDatabase(modules = ["WMCore.WMBS"]) self.testInit.delWorkDir() self.testInit.tearDownCouch() if self.alertsReceiver: self.alertsReceiver.shutdown() self.alertsReceiver = None return def getConfig(self): """ _createConfig_ General config file """ config = self.testInit.getConfiguration() #self.testInit.generateWorkDir(config) config.section_("General") config.General.workDir = "." config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL", "cmssrv52.fnal.gov:5984") config.JobStateMachine.couchDBName = self.databaseName config.JobStateMachine.jobSummaryDBName = 'wmagent_summary' config.component_("JobCreator") config.JobCreator.jobCacheDir = os.path.join(self.testDir, 'testDir') config.component_("TaskArchiver") config.TaskArchiver.componentDir = self.testDir config.TaskArchiver.WorkQueueParams = {} config.TaskArchiver.pollInterval = 60 config.TaskArchiver.logLevel = 'INFO' config.TaskArchiver.timeOut = 0 config.TaskArchiver.histogramKeys = ['AvgEventTime', 'writeTotalMB'] config.TaskArchiver.histogramBins = 5 config.TaskArchiver.histogramLimit = 5 config.TaskArchiver.workloadSummaryCouchDBName = "%s/workloadsummary" % self.databaseName config.TaskArchiver.workloadSummaryCouchURL = config.JobStateMachine.couchurl config.TaskArchiver.requireCouch = True config.TaskArchiver.uploadPublishInfo = self.uploadPublishInfo config.TaskArchiver.uploadPublishDir = self.uploadPublishDir config.TaskArchiver.userFileCacheURL = os.getenv('UFCURL', 'http://cms-xen38.fnal.gov:7725/userfilecache/') config.section_("ACDC") config.ACDC.couchurl = config.JobStateMachine.couchurl config.ACDC.database = config.JobStateMachine.couchDBName # Make the jobCacheDir os.mkdir(config.JobCreator.jobCacheDir) # addition for Alerts messaging framework, work (alerts) and control # channel addresses to which the component will be sending alerts # these are destination addresses where AlertProcessor:Receiver listens config.section_("Alert") config.Alert.address = "tcp://127.0.0.1:5557" config.Alert.controlAddr = "tcp://127.0.0.1:5559" config.section_("BossAir") config.BossAir.UISetupScript = '/afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh' config.BossAir.gliteConf = '/afs/cern.ch/cms/LCG/LCG-2/UI/conf/glite_wms_CERN.conf' config.BossAir.credentialDir = '/home/crab/ALL_SETUP/credentials/' config.BossAir.gLiteProcesses = 2 config.BossAir.gLitePrefixEnv = "/lib64/" config.BossAir.pluginNames = ["gLitePlugin"] config.BossAir.proxyDir = "/tmp/credentials" config.BossAir.manualProxyPath = os.environ['X509_USER_PROXY'] if 'X509_USER_PROXY' in os.environ else None config.section_("Agent") config.Agent.serverDN = "/we/bypass/myproxy/logon" return config def createWorkload(self, workloadName = 'Test', emulator = True): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.setCampaign(self.campaignName) workload.save(workloadName) return workload def createTestJobGroup(self, config, name = "TestWorkthrough", filesetName = "TestFileset", specLocation = "spec.xml", error = False, task = "/TestWorkload/ReReco", multicore = False): """ Creates a group of several jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec = specLocation, owner = self.OWNERDN, name = name, task = task, owner_vogroup="", owner_vorole="") testWorkflow.create() self.inject.execute(names = [name], injected = True) testWMBSFileset = Fileset(name = filesetName) testWMBSFileset.create() testFileA = File(lfn = "/this/is/a/lfnA" , size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12314])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) outputWMBSFileset = Fileset(name = '%sOutput' % filesetName) outputWMBSFileset.create() testFileC = File(lfn = "/this/is/a/lfnC" , size = 1024, events = 10) testFileC.addRun(Run(10, *[12312])) testFileC.setLocation('malpaquet') testFileC.create() outputWMBSFileset.addFile(testFileC) outputWMBSFileset.commit() outputWMBSFileset.markOpen(0) testWorkflow.addOutput('output', outputWMBSFileset) testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() for i in range(0,self.nJobs): testJob = Job(name = makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run = 10, lumis = [12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() changer = ChangeState(config) report1 = Report() report2 = Report() if error: path1 = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") path2 = path1 elif multicore: path1 = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/MulticoreReport.pkl") path2 = path1 else: path1 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'mergeReport1.pkl') path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'mergeReport2.pkl') report1.load(filename = path1) report2.load(filename = path2) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') for i in range(self.nJobs): if i < self.nJobs/2: testJobGroup.jobs[i]['fwjr'] = report1 else: testJobGroup.jobs[i]['fwjr'] = report2 changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'exhausted', 'jobfailed') changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup def createGiantJobSet(self, name, config, nSubs = 10, nJobs = 10, nFiles = 1, spec = "spec.xml"): """ Creates a massive set of jobs """ jobList = [] for i in range(0, nSubs): # Make a bunch of subscriptions localName = '%s-%i' % (name, i) testWorkflow = Workflow(spec = spec, owner = self.OWNERDN, name = localName, task="Test", owner_vogroup="", owner_vorole="") testWorkflow.create() testWMBSFileset = Fileset(name = localName) testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() filesToComplete = [] for j in range(0, nJobs): # Create jobs for each subscription testFileA = File(lfn = "%s-%i-lfnA" % (localName, j) , size = 1024, events = 10) testFileA.addRun(Run(10, *[11,12,13,14,15,16,17,18,19,20, 21,22,23,24,25,26,27,28,29,30, 31,32,33,34,35,36,37,38,39,40])) testFileA.setLocation('malpaquet') testFileA.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.commit() filesToComplete.append(testFileA) testJob = Job(name = '%s-%i' % (localName, j)) testJob.addFile(testFileA) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) jobList.append(testJob) for k in range(0, nFiles): # Create output files testFile = File(lfn = "%s-%i-output" % (localName, k) , size = 1024, events = 10) testFile.addRun(Run(10, *[12312])) testFile.setLocation('malpaquet') testFile.create() testJobGroup.output.addFile(testFile) testJobGroup.output.commit() testJobGroup.commit() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') changer.propagate(testJobGroup.jobs, 'cleanout', 'success') testWMBSFileset.markOpen(0) testSubscription.completeFiles(filesToComplete) return jobList def testA_BasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(config = config, name = workload.name(), specLocation = workloadPath, error = False) # Create second workload testJobGroup2 = self.createTestJobGroup(config = config, name = workload.name(), filesetName = "TestFileset_2", specLocation = workloadPath, task = "/TestWorkload/ReReco/LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) cachePath2 = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "LogCollect") os.makedirs(cachePath2) self.assertTrue(os.path.exists(cachePath2)) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 2) workflowName = "TestWorkload" dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobs = jobdb.loadView("JobDump", "jobsByWorkflowName", options = {"startkey": [workflowName], "endkey": [workflowName, {}]})['rows'] self.assertEqual(len(jobs), 2*self.nJobs) from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase create = CreateWMBSBase() tables = [] for x in create.requiredTables: tables.append(x[2:]) testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_fileset")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) # Make sure we deleted the directory self.assertFalse(os.path.exists(cachePath)) self.assertFalse(os.path.exists(os.path.join(self.testDir, 'workloadTest/TestWorkload'))) testWMBSFileset = Fileset(id = 1) self.assertEqual(testWMBSFileset.exists(), False) workloadSummary = workdatabase.document(id = "TestWorkload") # Check ACDC self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url']) # Check the output self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO', '/Electron/MorePenguins-v0/ALCARECO']) # Check performance # Check histograms self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['average'], 0.062651899999999996, places = 2) self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['nEvents'], 5) # Check standard performance self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['average'], 9.4950600000000005, places = 2) self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['stdDev'], 8.2912400000000002, places = 2) # Check worstOffenders self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['worstOffenders'], [{'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1}, {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2}, {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 3}]) # Check retryData self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'0': 10}) # LogCollect task is made out of identical FWJRs # assert that it is identical for x in workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys(): if x in config.TaskArchiver.histogramKeys: continue for y in ['average', 'stdDev']: self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y], workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'][x][y], places = 2) # The TestWorkload should have no jobs left workflowName = "TestWorkload" jobs = jobdb.loadView("JobDump", "jobsByWorkflowName", options = {"startkey": [workflowName], "endkey": [workflowName, {}]})['rows'] self.assertEqual(len(jobs), 0) jobs = fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options = {"startkey": [workflowName], "endkey": [workflowName, {}]})['rows'] self.assertEqual(len(jobs), 0) return def testB_testErrors(self): """ _testErrors_ Test with a failed FWJR """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(config = config, name = workload.name(), specLocation = workloadPath, error = True) cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() dbname = getattr(config.JobStateMachine, "couchDBName") couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname) workloadSummary = workdatabase.document(id = workload.name()) self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500) self.assertTrue(workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'].has_key('99999')) self.assertEquals(workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs'], {'10' : [12312]}, "Wrong lumi information in the summary for failed jobs") return def atestC_Profile(self): """ _Profile_ DON'T RUN THIS! """ return import cProfile, pstats myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name = name, config = config, nSubs = 10, nJobs = 1000, nFiles = 10) testTaskArchiver = TaskArchiverPoller(config = config) cProfile.runctx("testTaskArchiver.algorithm()", globals(), locals(), filename = "testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return def atestD_Timing(self): """ _Timing_ This is to see how fast things go. """ return myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name = name, config = config, nSubs = 10, nJobs = 1000, nFiles = 10) testTaskArchiver = TaskArchiverPoller(config = config) startTime = time.time() testTaskArchiver.algorithm() stopTime = time.time() result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) testWMBSFileset = Fileset(id = 1) self.assertEqual(testWMBSFileset.exists(), False) logging.info("TaskArchiver took %f seconds" % (stopTime - startTime)) def atestTaskArchiverPollerAlertsSending_notifyWorkQueue(self): """ Cause exception (alert-worthy situation) in the TaskArchiverPoller notifyWorkQueue method. """ return myThread = threading.currentThread() config = self.getConfig() testTaskArchiver = TaskArchiverPoller(config = config) # shall later be called directly from utils module handler, self.alertsReceiver = \ utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr) # prepare input such input which will go until where it expectantly # fails and shall send an alert # this will currently fail in the TaskArchiverPoller killSubscriptions # on trying to access .load() method which items of below don't have. # should anything change in the TaskArchiverPoller without modifying this # test accordingly, it may be failing ... print "failures 'AttributeError: 'dict' object has no attribute 'load' expected ..." subList = [{'id': 1}, {'id': 2}, {'id': 3}] testTaskArchiver.notifyWorkQueue(subList) # wait for the generated alert to arrive while len(handler.queue) < len(subList): time.sleep(0.3) print "%s waiting for alert to arrive ..." % inspect.stack()[0][3] self.alertsReceiver.shutdown() self.alertsReceiver = None # now check if the alert was properly sent (expect this many failures) self.assertEqual(len(handler.queue), len(subList)) alert = handler.queue[0] self.assertEqual(alert["Source"], "TaskArchiverPoller") def atestTaskArchiverPollerAlertsSending_killSubscriptions(self): """ Cause exception (alert-worthy situation) in the TaskArchiverPoller killSubscriptions method. (only 1 situation out of two tested). """ return myThread = threading.currentThread() config = self.getConfig() testTaskArchiver = TaskArchiverPoller(config = config) # shall later be called directly from utils module handler, self.alertsReceiver = \ utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr) # will fail on calling .load() - regardless, the same except block numAlerts = 3 doneList = [{'id': x} for x in range(numAlerts)] # final re-raise is currently commented, so don't expect Exception here testTaskArchiver.killSubscriptions(doneList) # wait for the generated alert to arrive while len(handler.queue) < numAlerts: time.sleep(0.3) print "%s waiting for alert to arrive ..." % inspect.stack()[0][3] self.alertsReceiver.shutdown() self.alertsReceiver = None # now check if the alert was properly sent self.assertEqual(len(handler.queue), numAlerts) alert = handler.queue[0] self.assertEqual(alert["Source"], "TaskArchiverPoller") return def testE_multicore(self): """ _multicore_ Create a workload summary based on the multicore job report """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(config = config, name = workload.name(), specLocation = workloadPath, error = False, multicore = True) cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0, "No job should have survived") result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) workloadSummary = workdatabase.document(id = "TestWorkload") self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['minMergeTime']['average'], 5.7624950408900002, places = 2) self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['numberOfMerges']['average'], 3.0, places = 2) self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['averageProcessTime']['average'], 29.369966666700002, places = 2) return # Requires a running UserFileCache to succeed. https://cmsweb.cern.ch worked for me # The environment variable OWNERDN needs to be set. Used to retrieve an already delegated proxy and contact the ufc @attr('integration') def testPublishJSONCreate(self): """ Re-run testA_BasicFunctionTest with data in DBSBuffer Make sure files are generated """ # Set up uploading and write them elsewhere since the test deletes them. self.uploadPublishInfo = True self.uploadPublishDir = self.testDir # Insert some DBSFiles testFileChildA = DBSBufferFile(lfn = "/this/is/a/child/lfnA", size = 1024, events = 20) testFileChildA.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileChildB = DBSBufferFile(lfn = "/this/is/a/child/lfnB", size = 1024, events = 20) testFileChildB.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileChildC = DBSBufferFile(lfn = "/this/is/a/child/lfnC", size = 1024, events = 20) testFileChildC.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileChildA.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER") testFileChildB.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER") testFileChildC.setDatasetPath("/Cosmics/USER-DATASET2-v1/USER") testFileChildA.create() testFileChildB.create() testFileChildC.create() testFile = DBSBufferFile(lfn = "/this/is/a/lfn", size = 1024, events = 10) testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO") testFile.create() testFileChildA.addParents([testFile["lfn"]]) testFileChildB.addParents([testFile["lfn"]]) testFileChildC.addParents([testFile["lfn"]]) myThread = threading.currentThread() self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.insertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow") workflowID = self.insertWorkflow.execute(requestName='TestWorkload', taskPath='TestWorkload/Analysis') myThread.dbi.processData("update dbsbuffer_file set workflow=1 where id < 4") # Run the test again self.testA_BasicFunctionTest() # Reset default values self.uploadPublishInfo = False self.uploadPublishDir = None # Make sure the files are there self.assertTrue(os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.json'))) self.assertTrue(os.path.getsize(os.path.join(self.testDir, 'TestWorkload_publish.json')) > 100) self.assertTrue(os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.tgz' ))) return
class JobArchiverTest(EmulatedUnitTestCase): """ TestCase for TestJobArchiver module """ _maxMessage = 10 def setUp(self): """ setup for test. """ myThread = threading.currentThread() super(JobArchiverTest, self).setUp() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() # self.tearDown() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.testInit.setupCouch("jobarchiver_t_0/jobs", "JobDump") self.testInit.setupCouch("jobarchiver_t_0/fwjrs", "FWJRDump") self.daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daofactory(classname="Jobs.GetAllJobs") self.testDir = self.testInit.generateWorkDir(deleteOnDestruction=False) self.nJobs = 10 self.configFile = EmulatorSetup.setupWMAgentConfig() return def tearDown(self): """ Database deletion """ self.testInit.clearDatabase(modules=["WMCore.WMBS"]) self.testInit.tearDownCouch() self.testInit.delWorkDir() EmulatorSetup.deleteConfig(self.configFile) super(JobArchiverTest, self).tearDown() return def getConfig(self): """ _createConfig_ General config file """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) # First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", os.getcwd()) config.General.WorkDir = os.getenv("TESTDIR", os.getcwd()) # Now the CoreDatabase information # This should be the dialect, dburl, etc config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL", "cmssrv48.fnal.gov:5984") config.JobStateMachine.couchDBName = "jobarchiver_t_0" config.component_("JobArchiver") config.JobArchiver.pollInterval = 60 config.JobArchiver.logLevel = 'INFO' # config.JobArchiver.logDir = os.path.join(self.testDir, 'logs') config.JobArchiver.componentDir = self.testDir config.JobArchiver.numberOfJobsToCluster = 1000 config.component_('WorkQueueManager') config.WorkQueueManager.namespace = "WMComponent.WorkQueueManager.WorkQueueManager" config.WorkQueueManager.componentDir = config.General.workDir + "/WorkQueueManager" config.WorkQueueManager.level = 'LocalQueue' config.WorkQueueManager.logLevel = 'DEBUG' config.WorkQueueManager.couchurl = 'https://None' config.WorkQueueManager.dbname = 'whatever' config.WorkQueueManager.inboxDatabase = 'whatever2' config.WorkQueueManager.queueParams = {} config.WorkQueueManager.queueParams["ParentQueueCouchUrl"] = "https://cmsweb.cern.ch/couchdb/workqueue" return config def createTestJobGroup(self): """ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for _ in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup def testBasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() testJobGroup = self.createTestJobGroup() changer = ChangeState(config) cacheDir = os.path.join(self.testDir, 'test') if not os.path.isdir(cacheDir): os.mkdir(cacheDir) # if os.path.isdir(config.JobArchiver.logDir): # shutil.rmtree(config.JobArchiver.logDir) for job in testJobGroup.jobs: myThread.transaction.begin() job["outcome"] = "success" job.save() myThread.transaction.commit() path = os.path.join(cacheDir, job['name']) os.makedirs(path) f = open('%s/%s.out' % (path, job['name']), 'w') f.write(job['name']) f.close() job.setCache(path) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') testJobArchiver = JobArchiverPoller(config=config) testJobArchiver.algorithm() result = myThread.dbi.processData( "SELECT wmbs_job_state.name FROM wmbs_job_state INNER JOIN wmbs_job ON wmbs_job.state = wmbs_job_state.id")[ 0].fetchall() for val in result: self.assertEqual(val.values(), ['cleanout']) dirList = os.listdir(cacheDir) for job in testJobGroup.jobs: self.assertEqual(job["name"] in dirList, False) logPath = os.path.join(config.JobArchiver.componentDir, 'logDir', 'w', 'wf001', 'JobCluster_0') logList = os.listdir(logPath) for job in testJobGroup.jobs: self.assertEqual('Job_%i.tar.bz2' % (job['id']) in logList, True, 'Could not find transferred tarball for job %i' % (job['id'])) pipe = Popen(['tar', '-jxvf', os.path.join(logPath, 'Job_%i.tar.bz2' % (job['id']))], stdout=PIPE, stderr=PIPE, shell=False) pipe.wait() # filename = '%s/%s/%s.out' %(cacheDir[1:], job['name'], job['name']) filename = 'Job_%i/%s.out' % (job['id'], job['name']) self.assertEqual(os.path.isfile(filename), True, 'Could not find file %s' % (filename)) f = open(filename, 'r') fileContents = f.readlines() f.close() self.assertEqual(fileContents[0].find(job['name']) > -1, True) shutil.rmtree('Job_%i' % (job['id'])) if os.path.isfile('Job_%i.tar.bz2' % (job['id'])): os.remove('Job_%i.tar.bz2' % (job['id'])) return @attr('integration') def testSpeedTest(self): """ _SpeedTest_ Tests the components, as in sees if they load. Otherwise does nothing. """ return
class TaskArchiverTest(unittest.TestCase): """ TestCase for TestTaskArchiver module """ _setup_done = False _teardown = False _maxMessage = 10 OWNERDN = os.environ['OWNERDN'] if 'OWNERDN' in os.environ else "Generic/OWNERDN" def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase = True) self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBS3Buffer"], useDefault = False) self.databaseName = "taskarchiver_t_0" self.testInit.setupCouch("%s/workloadsummary" % self.databaseName, "WorkloadSummary") self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump") self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump") self.testInit.setupCouch("wmagent_summary_t", "WMStats") self.testInit.setupCouch("wmagent_summary_central_t", "WMStats") self.testInit.setupCouch("stat_summary_t", "SummaryStats") reqmgrdb = "reqmgrdb_t" self.testInit.setupCouch(reqmgrdb, "ReqMgr") reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqmgrdb) self.requestWriter = RequestDBWriter(reqDBURL) self.requestWriter.defaultStale = {} self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daofactory(classname = "Jobs.GetAllJobs") self.inject = self.daofactory(classname = "Workflow.MarkInjectedWorkflows") self.testDir = self.testInit.generateWorkDir() os.makedirs(os.path.join(self.testDir, 'specDir')) self.nJobs = 10 self.campaignName = 'aCampaign' self.uploadPublishInfo = False self.uploadPublishDir = None return def tearDown(self): """ Database deletion """ myThread = threading.currentThread() self.testInit.clearDatabase(modules = ["WMCore.WMBS"]) self.testInit.delWorkDir() self.testInit.tearDownCouch() return def getConfig(self): """ _createConfig_ General config file """ config = self.testInit.getConfiguration() #self.testInit.generateWorkDir(config) config.section_("General") config.General.workDir = "." config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL", "cmssrv52.fnal.gov:5984") config.JobStateMachine.couchDBName = self.databaseName config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t' config.JobStateMachine.summaryStatsDBName = 'stat_summary_t' config.component_("JobCreator") config.JobCreator.jobCacheDir = os.path.join(self.testDir, 'testDir') config.component_("TaskArchiver") config.TaskArchiver.componentDir = self.testDir config.TaskArchiver.WorkQueueParams = {} config.TaskArchiver.pollInterval = 60 config.TaskArchiver.logLevel = 'INFO' config.TaskArchiver.timeOut = 0 config.TaskArchiver.histogramKeys = ['AvgEventTime', 'writeTotalMB', 'jobTime'] config.TaskArchiver.histogramBins = 5 config.TaskArchiver.histogramLimit = 5 config.TaskArchiver.perfPrimaryDatasets = ['SingleMu', 'MuHad', 'MinimumBias'] config.TaskArchiver.perfDashBoardMinLumi = 50 config.TaskArchiver.perfDashBoardMaxLumi = 9000 config.TaskArchiver.dqmUrl = 'https://cmsweb.cern.ch/dqm/dev/' config.TaskArchiver.dashBoardUrl = 'http://dashboard43.cern.ch/dashboard/request.py/putluminositydata' config.TaskArchiver.workloadSummaryCouchDBName = "%s/workloadsummary" % self.databaseName config.TaskArchiver.workloadSummaryCouchURL = config.JobStateMachine.couchurl config.TaskArchiver.requireCouch = True config.TaskArchiver.uploadPublishInfo = self.uploadPublishInfo config.TaskArchiver.uploadPublishDir = self.uploadPublishDir config.TaskArchiver.userFileCacheURL = os.getenv('UFCURL', 'http://cms-xen38.fnal.gov:7725/userfilecache/') config.TaskArchiver.ReqMgr2ServiceURL = "https://cmsweb-dev.cern.ch/reqmgr2" config.TaskArchiver.ReqMgrServiceURL = "https://cmsweb-dev.cern.ch/reqmgr/rest" config.TaskArchiver.localWMStatsURL = "%s/%s" % (config.JobStateMachine.couchurl, config.JobStateMachine.jobSummaryDBName) config.component_("AnalyticsDataCollector") config.AnalyticsDataCollector.centralRequestDBURL = '%s/reqmgrdb_t' % config.JobStateMachine.couchurl config.AnalyticsDataCollector.RequestCouchApp = "ReqMgr" config.section_("ACDC") config.ACDC.couchurl = config.JobStateMachine.couchurl config.ACDC.database = config.JobStateMachine.couchDBName # Make the jobCacheDir os.mkdir(config.JobCreator.jobCacheDir) # addition for Alerts messaging framework, work (alerts) and control # channel addresses to which the component will be sending alerts # these are destination addresses where AlertProcessor:Receiver listens config.section_("Alert") config.Alert.address = "tcp://127.0.0.1:5557" config.Alert.controlAddr = "tcp://127.0.0.1:5559" config.section_("BossAir") config.BossAir.UISetupScript = '/afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh' config.BossAir.gliteConf = '/afs/cern.ch/cms/LCG/LCG-2/UI/conf/glite_wms_CERN.conf' config.BossAir.credentialDir = '/home/crab/ALL_SETUP/credentials/' config.BossAir.gLiteProcesses = 2 config.BossAir.gLitePrefixEnv = "/lib64/" config.BossAir.pluginNames = ["gLitePlugin"] config.BossAir.proxyDir = "/tmp/credentials" config.BossAir.manualProxyPath = os.environ['X509_USER_PROXY'] if 'X509_USER_PROXY' in os.environ else None config.section_("Agent") config.Agent.serverDN = "/we/bypass/myproxy/logon" return config def createWorkload(self, workloadName = 'Test', emulator = True): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.setCampaign(self.campaignName) workload.save(workloadName) return workload def createTestJobGroup(self, config, name = "TestWorkthrough", filesetName = "TestFileset", specLocation = "spec.xml", error = False, task = "/TestWorkload/ReReco", type = "Processing"): """ Creates a group of several jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec = specLocation, owner = self.OWNERDN, name = name, task = task, owner_vogroup="", owner_vorole="") testWorkflow.create() self.inject.execute(names = [name], injected = True) testWMBSFileset = Fileset(name = filesetName) testWMBSFileset.create() testFileA = File(lfn = "/this/is/a/lfnA" , size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12314])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) outputWMBSFileset = Fileset(name = '%sOutput' % filesetName) outputWMBSFileset.create() testFileC = File(lfn = "/this/is/a/lfnC" , size = 1024, events = 10) testFileC.addRun(Run(10, *[12312])) testFileC.setLocation('malpaquet') testFileC.create() outputWMBSFileset.addFile(testFileC) outputWMBSFileset.commit() outputWMBSFileset.markOpen(0) testWorkflow.addOutput('output', outputWMBSFileset) testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = type) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() for i in range(0,self.nJobs): testJob = Job(name = makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run = 10, lumis = [12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() changer = ChangeState(config) report1 = Report() report2 = Report() if error: path1 = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') else: path1 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'mergeReport1.pkl') path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') report1.load(filename = path1) report2.load(filename = path2) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') for i in range(self.nJobs): if i < self.nJobs/2: testJobGroup.jobs[i]['fwjr'] = report1 else: testJobGroup.jobs[i]['fwjr'] = report2 changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed') changer.propagate(testJobGroup.jobs, 'exhausted', 'retrydone') changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup def createGiantJobSet(self, name, config, nSubs = 10, nJobs = 10, nFiles = 1, spec = "spec.xml"): """ Creates a massive set of jobs """ jobList = [] for i in range(0, nSubs): # Make a bunch of subscriptions localName = '%s-%i' % (name, i) testWorkflow = Workflow(spec = spec, owner = self.OWNERDN, name = localName, task="Test", owner_vogroup="", owner_vorole="") testWorkflow.create() testWMBSFileset = Fileset(name = localName) testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() filesToComplete = [] for j in range(0, nJobs): # Create jobs for each subscription testFileA = File(lfn = "%s-%i-lfnA" % (localName, j) , size = 1024, events = 10) testFileA.addRun(Run(10, *[11,12,13,14,15,16,17,18,19,20, 21,22,23,24,25,26,27,28,29,30, 31,32,33,34,35,36,37,38,39,40])) testFileA.setLocation('malpaquet') testFileA.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.commit() filesToComplete.append(testFileA) testJob = Job(name = '%s-%i' % (localName, j)) testJob.addFile(testFileA) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) jobList.append(testJob) for k in range(0, nFiles): # Create output files testFile = File(lfn = "%s-%i-output" % (localName, k) , size = 1024, events = 10) testFile.addRun(Run(10, *[12312])) testFile.setLocation('malpaquet') testFile.create() testJobGroup.output.addFile(testFile) testJobGroup.output.commit() testJobGroup.commit() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') changer.propagate(testJobGroup.jobs, 'cleanout', 'success') testWMBSFileset.markOpen(0) testSubscription.completeFiles(filesToComplete) return jobList def getPerformanceFromDQM(self, dqmUrl, dataset, run): # Make function to fetch this from DQM. Returning Null or False if it fails getUrl = "%sjsonfairy/archive/%s%s/DQM/TimerService/event_byluminosity" % (dqmUrl, run, dataset) # Assert if the URL is assembled as expected if run == 207214: self.assertEqual('https://cmsweb.cern.ch/dqm/dev/jsonfairy/archive/207214/MinimumBias/Commissioning10-v4/DQM/DQM/TimerService/event_byluminosity', getUrl) # let's suppose it works.. testResponseFile = open(os.path.join(getTestBase(), 'WMComponent_t/TaskArchiver_t/DQMGUIResponse.json'), 'r') response = testResponseFile.read() testResponseFile.close() responseJSON = json.loads(response) return responseJSON def filterInterestingPerfPoints(self, responseJSON, minLumi, maxLumi): worthPoints = {} points = responseJSON["hist"]["bins"]["content"] for i in range(responseJSON["hist"]["xaxis"]["first"]["id"], responseJSON["hist"]["xaxis"]["last"]["id"]): # is the point worth it? if yes add to interesting points dictionary. # 1 - non 0 # 2 - between minimum and maximum expected luminosity # FIXME : 3 - population in dashboard for the bin interval < 100 # Those should come from the config : if points[i] == 0: continue binSize = responseJSON["hist"]["xaxis"]["last"]["value"]/responseJSON["hist"]["xaxis"]["last"]["id"] # Fetching the important values instLuminosity = i*binSize timePerEvent = points[i] if instLuminosity > minLumi and instLuminosity < maxLumi : worthPoints[instLuminosity] = timePerEvent return worthPoints def publishPerformanceDashBoard(self, dashBoardUrl, PD, release, worthPoints): dashboardPayload = [] for instLuminosity in worthPoints : timePerEvent = int(worthPoints[instLuminosity]) dashboardPayload.append({"primaryDataset" : PD, "release" : release, "integratedLuminosity" : instLuminosity, "timePerEvent" : timePerEvent}) data = "{\"data\":%s}" % str(dashboardPayload).replace("\'","\"") # let's suppose it works.. testDashBoardPayloadFile = open(os.path.join(getTestBase(), 'WMComponent_t/TaskArchiver_t/DashBoardPayload.json'), 'r') testDashBoardPayload = testDashBoardPayloadFile.read() testDashBoardPayloadFile.close() self.assertEqual(data, testDashBoardPayload) return True def populateWorkflowWithCompleteStatus(self, name ="TestWorkload"): schema = generate_reqmgr_schema(1) schema[0]["RequestName"] = name self.requestWriter.insertGenericRequest(schema[0]) result = self.requestWriter.updateRequestStatus(name, "completed") return result def testA_BasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(config = config, name = workload.name(), specLocation = workloadPath, error = False) # Create second workload testJobGroup2 = self.createTestJobGroup(config = config, name = workload.name(), filesetName = "TestFileset_2", specLocation = workloadPath, task = "/TestWorkload/ReReco/LogCollect", type = "LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) cachePath2 = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "LogCollect") os.makedirs(cachePath2) self.assertTrue(os.path.exists(cachePath2)) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 2) workflowName = "TestWorkload" dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobs = jobdb.loadView("JobDump", "jobsByWorkflowName", options = {"startkey": [workflowName], "endkey": [workflowName, {}]})['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options = {"startkey": [workflowName], "endkey": [workflowName, {}]})['rows'] self.assertEqual(len(jobs), 2*self.nJobs) from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase create = CreateWMBSBase() tables = [] for x in create.requiredTables: tables.append(x[2:]) self.populateWorkflowWithCompleteStatus() testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() cleanCouch = CleanCouchPoller(config = config) cleanCouch.setup() cleanCouch.algorithm() result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_fileset")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) # Make sure we deleted the directory self.assertFalse(os.path.exists(cachePath)) self.assertFalse(os.path.exists(os.path.join(self.testDir, 'workloadTest/TestWorkload'))) testWMBSFileset = Fileset(id = 1) self.assertEqual(testWMBSFileset.exists(), False) workloadSummary = workdatabase.document(id = "TestWorkload") # Check ACDC self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url']) # Check the output self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO']) self.assertEqual(sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO']['tasks']), ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect']) # Check performance # Check histograms self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['average'], 0.89405199999999996, places = 2) self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['histogram'][0]['nEvents'], 10) # Check standard performance self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['average'], 17.786300000000001, places = 2) self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['TotalJobCPU']['stdDev'], 0.0, places = 2) # Check worstOffenders self.assertEqual(workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1']['AvgEventTime']['worstOffenders'], [{'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1}, {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1}, {'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2}]) # Check retryData self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'1': 10}) logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar' self.assertEqual(workloadSummary['logArchives'], {'/TestWorkload/ReReco/LogCollect' : [logCollectPFN for _ in range(10)]}) # LogCollect task is made out of identical FWJRs # assert that it is identical for x in workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys(): if x in config.TaskArchiver.histogramKeys: continue for y in ['average', 'stdDev']: self.assertAlmostEquals(workloadSummary['performance']['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y], workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'][x][y], places = 2) return def testB_testErrors(self): """ _testErrors_ Test with a failed FWJR """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(config = config, name = workload.name(), specLocation = workloadPath, error = True) # Create second workload testJobGroup2 = self.createTestJobGroup(config = config, name = workload.name(), filesetName = "TestFileset_2", specLocation = workloadPath, task = "/TestWorkload/ReReco/LogCollect", type = "LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) couchdb = CouchServer(config.JobStateMachine.couchurl) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobdb.loadView("JobDump", "jobsByWorkflowName", options = {"startkey": [workload.name()], "endkey": [workload.name(), {}]})['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options = {"startkey": [workload.name()], "endkey": [workload.name(), {}]})['rows'] self.populateWorkflowWithCompleteStatus() testTaskArchiver = TaskArchiverPoller(config = config) testTaskArchiver.algorithm() cleanCouch = CleanCouchPoller(config = config) cleanCouch.setup() cleanCouch.algorithm() dbname = getattr(config.JobStateMachine, "couchDBName") workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname) workloadSummary = workdatabase.document(id = workload.name()) self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500) self.assertTrue('99999' in workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']) failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs'] self.assertEqual(failedRunInfo, {'10' : [[12312, 12312]]}, "Wrong lumi information in the summary for failed jobs") # Check the failures by site histograms self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['data']['T1_IT_CNAF']['Failed Jobs'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10) self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['average']['Failed Jobs'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['99999'], 10) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['8020'], 10) self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['stdDev']['Failed Jobs'], 0) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['99999'], 0) self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['8020'], 0) return def testC_Profile(self): """ _Profile_ DON'T RUN THIS! """ return import cProfile, pstats myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name = name, config = config, nSubs = 10, nJobs = 1000, nFiles = 10) cleanCouch = CleanCouchPoller(config = config) cleanCouch.setup() cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename = "testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return def testD_Timing(self): """ _Timing_ This is to see how fast things go. """ return myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name = name, config = config, nSubs = 10, nJobs = 1000, nFiles = 10) testTaskArchiver = TaskArchiverPoller(config = config) startTime = time.time() testTaskArchiver.algorithm() stopTime = time.time() result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) testWMBSFileset = Fileset(id = 1) self.assertEqual(testWMBSFileset.exists(), False) logging.info("TaskArchiver took %f seconds" % (stopTime - startTime)) def testDQMRecoPerformanceToDashBoard(self): myThread = threading.currentThread() listRunsWorkflow = self.dbsDaoFactory(classname="ListRunsWorkflow") # Didn't like to have done that, but the test doesn't provide all info I need in the system, so faking it: myThread.dbi.processData("""insert into dbsbuffer_workflow(id, name) values (1, 'TestWorkload')""" , transaction = False) myThread.dbi.processData("""insert into dbsbuffer_file (id, lfn, dataset_algo, workflow) values (1, '/store/t/e/s/t.test', 1, 1)""" , transaction = False) myThread.dbi.processData("""insert into dbsbuffer_file (id, lfn, dataset_algo, workflow) values (2, '/store/t/e/s/t.test2', 1, 1)""" , transaction = False) myThread.dbi.processData("""insert into dbsbuffer_file_runlumi_map (run, lumi, filename) values (207214, 100, 1)""" , transaction = False) myThread.dbi.processData("""insert into dbsbuffer_file_runlumi_map (run, lumi, filename) values (207215, 200, 2)""" , transaction = False) config = self.getConfig() dqmUrl = getattr(config.TaskArchiver, "dqmUrl") perfDashBoardMinLumi = getattr(config.TaskArchiver, "perfDashBoardMinLumi") perfDashBoardMaxLumi = getattr(config.TaskArchiver, "perfDashBoardMaxLumi") dashBoardUrl = getattr(config.TaskArchiver, "dashBoardUrl") workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(config = config, name = workload.name(), specLocation = workloadPath, error = True) testJobGroup2 = self.createTestJobGroup(config = config, name = workload.name(), filesetName = "TestFileset_2", specLocation = workloadPath, task = "/TestWorkload/ReReco/LogCollect", type = "LogCollect") # Adding request type as ReReco, real ReqMgr requests have it workload.data.request.section_("schema") workload.data.request.schema.RequestType = "ReReco" workload.data.request.schema.CMSSWVersion = 'test_compops_CMSSW_5_3_6_patch1' workload.getTask('ReReco').addInputDataset(primary='a',processed='b',tier='c') interestingPDs = getattr(config.TaskArchiver, "perfPrimaryDatasets") interestingDatasets = [] # Are the datasets from this request interesting? Do they have DQM output? One might ask afterwards if they have harvest for dataset in workload.listOutputDatasets(): (nothing, PD, procDataSet, dataTier) = dataset.split('/') if PD in interestingPDs and dataTier == "DQM": interestingDatasets.append(dataset) # We should have found 1 interesting dataset self.assertAlmostEquals(len(interestingDatasets), 1) if len(interestingDatasets) == 0 : return # Request will be only interesting for performance if it's a ReReco or PromptReco (isReReco, isPromptReco) = (False, False) if getattr(workload.data.request.schema, "RequestType", None) == 'ReReco': isReReco=True # Yes, few people like magic strings, but have a look at : # https://github.com/dmwm/T0/blob/master/src/python/T0/RunConfig/RunConfigAPI.py#L718 # Might be safe enough # FIXME: in TaskArchiver, add a test to make sure that the dataset makes sense (procDataset ~= /a/ERA-PromptReco-vVERSON/DQM) if re.search('PromptReco', workload.name()): isPromptReco = True if not (isReReco or isPromptReco): return self.assertTrue(isReReco) self.assertFalse(isPromptReco) # We are not interested if it's not a PromptReco or a ReReco if (isReReco or isPromptReco) == False: return if isReReco : release = getattr(workload.data.request.schema, "CMSSWVersion") if not release : logging.info("no release for %s, bailing out" % workload.name()) else : release = getattr(workload.tasks.Reco.steps.cmsRun1.application.setup, "cmsswVersion") if not release : logging.info("no release for %s, bailing out" % workload.name()) self.assertEqual(release, "test_compops_CMSSW_5_3_6_patch1") # If all is true, get the run numbers processed by this worklfow runList = listRunsWorkflow.execute(workflow = workload.name()) self.assertEqual([207214, 207215], runList) # GO to DQM GUI, get what you want # https://cmsweb.cern.ch/dqm/offline/jsonfairy/archive/211313/PAMuon/HIRun2013-PromptReco-v1/DQM/DQM/TimerService/event for dataset in interestingDatasets : (nothing, PD, procDataSet, dataTier) = dataset.split('/') worthPoints = {} for run in runList : responseJSON = self.getPerformanceFromDQM(dqmUrl, dataset, run) worthPoints.update(self.filterInterestingPerfPoints(responseJSON, perfDashBoardMinLumi, perfDashBoardMaxLumi)) # Publish dataset performance to DashBoard. if self.publishPerformanceDashBoard(dashBoardUrl, PD, release, worthPoints) == False: logging.info("something went wrong when publishing dataset %s to DashBoard" % dataset) return # Requires a running UserFileCache to succeed. https://cmsweb.cern.ch worked for me # The environment variable OWNERDN needs to be set. Used to retrieve an already delegated proxy and contact the ufc @attr('integration') def testPublishJSONCreate(self): """ Re-run testA_BasicFunctionTest with data in DBSBuffer Make sure files are generated """ # Set up uploading and write them elsewhere since the test deletes them. self.uploadPublishInfo = True self.uploadPublishDir = self.testDir # Insert some DBSFiles testFileChildA = DBSBufferFile(lfn = "/this/is/a/child/lfnA", size = 1024, events = 20) testFileChildA.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileChildB = DBSBufferFile(lfn = "/this/is/a/child/lfnB", size = 1024, events = 20) testFileChildB.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileChildC = DBSBufferFile(lfn = "/this/is/a/child/lfnC", size = 1024, events = 20) testFileChildC.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFileChildA.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER") testFileChildB.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER") testFileChildC.setDatasetPath("/Cosmics/USER-DATASET2-v1/USER") testFileChildA.create() testFileChildB.create() testFileChildC.create() testFile = DBSBufferFile(lfn = "/this/is/a/lfn", size = 1024, events = 10) testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_2_1_8", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO") testFile.create() testFileChildA.addParents([testFile["lfn"]]) testFileChildB.addParents([testFile["lfn"]]) testFileChildC.addParents([testFile["lfn"]]) myThread = threading.currentThread() self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.insertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow") workflowID = self.insertWorkflow.execute(requestName='TestWorkload', taskPath='TestWorkload/Production', blockMaxCloseTime=100, blockMaxFiles=100, blockMaxEvents=100, blockMaxSize=100) myThread.dbi.processData("update dbsbuffer_file set workflow=1 where id < 4") # Run the test again self.testA_BasicFunctionTest() # Reset default values self.uploadPublishInfo = False self.uploadPublishDir = None # Make sure the files are there self.assertTrue(os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.json'))) self.assertTrue(os.path.getsize(os.path.join(self.testDir, 'TestWorkload_publish.json')) > 100) self.assertTrue(os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.tgz' ))) return
class JobCreatorTest(unittest.TestCase): """ Test case for the JobCreator """ sites = ['T2_US_Florida', 'T2_US_UCSD', 'T2_TW_Taiwan', 'T1_CH_CERN'] def setUp(self): """ _setUp_ Setup the database and logging connection. Try to create all of the WMBS tables. Also, create some dummy locations. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() #self.tearDown() self.testInit.setSchema(customModules=[ 'WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database' ], useDefault=False) self.couchdbname = "jobcreator_t" self.testInit.setupCouch("%s/jobs" % self.couchdbname, "JobDump") self.testInit.setupCouch("%s/fwjrs" % self.couchdbname, "FWJRDump") self.configFile = EmulatorSetup.setupWMAgentConfig() myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = self.daoFactory(classname="Locations.New") for site in self.sites: locationAction.execute(siteName=site, seName=site) #Create sites in resourceControl resourceControl = ResourceControl() for site in self.sites: resourceControl.insertSite(siteName=site, seName=site, ceName=site) resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) self.resourceControl = resourceControl self._setup = True self._teardown = False self.testDir = self.testInit.generateWorkDir() self.cwd = os.getcwd() # Set heartbeat self.componentName = 'JobCreator' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ myThread = threading.currentThread() self.testInit.clearDatabase(modules=[ 'WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database' ]) self.testInit.delWorkDir() self._teardown = True self.testInit.tearDownCouch() EmulatorSetup.deleteConfig(self.configFile) return def createJobCollection(self, name, nSubs, nFiles, workflowURL='test'): """ _createJobCollection_ Create a collection of jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") testWorkflow.create() for sub in range(nSubs): nameStr = '%s-%i' % (name, sub) myThread.transaction.begin() testFileset = Fileset(name=nameStr) testFileset.create() for f in range(nFiles): # pick a random site site = random.choice(self.sites) testFile = File(lfn="/lfn/%s/%i" % (nameStr, f), size=1024, events=10) testFile.setLocation(site) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() myThread.transaction.commit() return def createWorkload(self, workloadName='Test', emulator=True, priority=1): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") rereco = workload.getTask("ReReco") seederDict = { "generator.initialSeed": 1001, "evtgenproducer.initialSeed": 1001 } rereco.addGenerator("PresetSeeder", **seederDict) taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() return workload def getConfig(self): """ _getConfig_ Creates a common config. """ myThread = threading.currentThread() config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) #First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", os.getcwd()) config.section_("Agent") config.Agent.componentName = self.componentName #Now the CoreDatabase information #This should be the dialect, dburl, etc config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.component_("JobCreator") config.JobCreator.namespace = 'WMComponent.JobCreator.JobCreator' #The log level of the component. #config.JobCreator.logLevel = 'SQLDEBUG' config.JobCreator.logLevel = 'INFO' # maximum number of threads we want to deal # with messages per pool. config.JobCreator.maxThreads = 1 config.JobCreator.UpdateFromResourceControl = True config.JobCreator.pollInterval = 10 #config.JobCreator.jobCacheDir = self.testDir config.JobCreator.defaultJobType = 'processing' #Type of jobs that we run, used for resource control config.JobCreator.workerThreads = 4 config.JobCreator.componentDir = self.testDir config.JobCreator.useWorkQueue = True config.JobCreator.WorkQueueParams = {'emulateDBSReader': True} # We now call the JobMaker from here config.component_('JobMaker') config.JobMaker.logLevel = 'INFO' config.JobMaker.namespace = 'WMCore.WMSpec.Makers.JobMaker' config.JobMaker.maxThreads = 1 config.JobMaker.makeJobsHandler = 'WMCore.WMSpec.Makers.Handlers.MakeJobs' #JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', 'cmssrv52.fnal.gov:5984') config.JobStateMachine.couchDBName = self.couchdbname return config def testA_VerySimpleTest(self): """ _VerySimpleTest_ Just test that everything works...more or less """ #return myThread = threading.currentThread() config = self.getConfig() name = makeUUID() nSubs = 5 nFiles = 10 workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) testJobCreator = JobCreatorPoller(config=config) # First, can we run once without everything crashing? testJobCreator.algorithm() getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), nSubs * nFiles) # Count database objects result = myThread.dbi.processData( 'SELECT * FROM wmbs_sub_files_acquired')[0].fetchall() self.assertEqual(len(result), nSubs * nFiles) # Find the test directory testDirectory = os.path.join(self.testDir, 'jobCacheDir', 'TestWorkload', 'ReReco') # It should have at least one jobGroup self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory)) # But no more then twenty self.assertTrue(len(os.listdir(testDirectory)) <= 20) groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0') # First job should be in here listOfDirs = [] for tmpDirectory in os.listdir(testDirectory): listOfDirs.extend( os.listdir(os.path.join(testDirectory, tmpDirectory))) self.assertTrue('job_1' in listOfDirs) self.assertTrue('job_2' in listOfDirs) self.assertTrue('job_3' in listOfDirs) jobDir = os.listdir(groupDirectory)[0] jobFile = os.path.join(groupDirectory, jobDir, 'job.pkl') self.assertTrue(os.path.isfile(jobFile)) f = open(jobFile, 'r') job = cPickle.load(f) f.close() self.assertEqual(job.baggage.PresetSeeder.generator.initialSeed, 1001) self.assertEqual(job.baggage.PresetSeeder.evtgenproducer.initialSeed, 1001) self.assertEqual(job['workflow'], name) self.assertEqual(len(job['input_files']), 1) self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2') return @attr('performance') def testB_ProfilePoller(self): """ Profile your performance You shouldn't be running this normally because it doesn't do anything """ return myThread = threading.currentThread() name = makeUUID() nSubs = 5 nFiles = 1500 workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) config = self.getConfig() testJobCreator = JobCreatorPoller(config=config) cProfile.runctx("testJobCreator.algorithm()", globals(), locals(), filename="testStats.stat") getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") time.sleep(10) self.assertEqual(len(result), nSubs * nFiles) p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats(.2) return def testC_ProfileWorker(self): """ Profile where the work actually gets done You shouldn't be running this one either, since it doesn't test anything. """ return myThread = threading.currentThread() name = makeUUID() nSubs = 5 nFiles = 500 workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) config = self.getConfig() configDict = { "couchURL": config.JobStateMachine.couchurl, "couchDBName": config.JobStateMachine.couchDBName, 'jobCacheDir': config.JobCreator.jobCacheDir, 'defaultJobType': config.JobCreator.defaultJobType } input = [{ "subscription": 1 }, { "subscription": 2 }, { "subscription": 3 }, { "subscription": 4 }, { "subscription": 5 }] testJobCreator = JobCreatorPoller(**configDict) cProfile.runctx("testJobCreator.algorithm(parameters = input)", globals(), locals(), filename="workStats.stat") p = pstats.Stats('workStats.stat') p.sort_stats('cumulative') p.print_stats(.2) return def testD_HugeTest(self): """ Don't run this one either """ return myThread = threading.currentThread() config = self.getConfig() name = makeUUID() nSubs = 10 nFiles = 5000 workloadName = 'Tier1ReReco' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) testJobCreator = JobCreatorPoller(config=config) # First, can we run once without everything crashing? startTime = time.time() testJobCreator.algorithm() stopTime = time.time() getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), nSubs * nFiles) print("Job took %f seconds to run" % (stopTime - startTime)) # Count database objects result = myThread.dbi.processData( 'SELECT * FROM wmbs_sub_files_acquired')[0].fetchall() self.assertEqual(len(result), nSubs * nFiles) return def stuffWMBS(self, workflowURL, name): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="s1", seName="somese.cern.ch") changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") mergeFileset = Fileset(name="mergeFileset") mergeFileset.create() bogusFileset = Fileset(name="bogusFileset") bogusFileset.create() mergeWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") mergeWorkflow.create() mergeSubscription = Subscription(fileset=mergeFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") mergeSubscription.create() bogusSubscription = Subscription(fileset=bogusFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations=set(["somese.cern.ch"])) file1.addRun(Run(1, *[45])) file1.create() file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations=set(["somese.cern.ch"])) file2.addRun(Run(1, *[45])) file2.create() file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations=set(["somese.cern.ch"])) file3.addRun(Run(1, *[45])) file3.create() file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations=set(["somese.cern.ch"])) file4.addRun(Run(1, *[45])) file4.create() fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations=set(["somese.cern.ch"])) fileA.addRun(Run(1, *[46])) fileA.create() fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations=set(["somese.cern.ch"])) fileB.addRun(Run(1, *[46])) fileB.create() fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations=set(["somese.cern.ch"])) fileC.addRun(Run(1, *[46])) fileC.create() fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations=set(["somese.cern.ch"])) fileI.addRun(Run(2, *[46])) fileI.create() fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations=set(["somese.cern.ch"])) fileII.addRun(Run(2, *[46])) fileII.create() fileIII = File(lfn="fileIII", size=1024, events=102400, first_event=2048, locations=set(["somese.cern.ch"])) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIV = File(lfn="fileIV", size=102400, events=1024, first_event=3072, locations=set(["somese.cern.ch"])) fileIV.addRun(Run(2, *[46])) fileIV.create() for file in [ file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV ]: mergeFileset.addFile(file) bogusFileset.addFile(file) mergeFileset.commit() bogusFileset.commit() return def testE_TestNonProxySplitting(self): """ _TestNonProxySplitting_ Test and see if we can split things without a proxy. """ myThread = threading.currentThread() config = self.getConfig() config.JobCreator.workerThreads = 1 name = makeUUID() workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) # Change the file splitting algo procTask = workload.getTask("ReReco") procTask.setSplittingAlgorithm("ParentlessMergeBySize", min_merge_size=1, max_merge_size=100000, max_merge_events=200000) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.stuffWMBS(workflowURL=workloadPath, name=name) testJobCreator = JobCreatorPoller(config=config) testJobCreator.algorithm() getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 1) result = getJobsAction.execute(state='Created', jobType="Merge") self.assertEqual(len(result), 0) return
class URLFetcherTest(unittest.TestCase): """ Main test for the URLFetcher """ def setUp(self): """ Basic setUp """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ Basic tearDown """ self.testInit.delWorkDir() self.testInit.tearDownCouch() return def getConfig(self): """ _getConfig_ Create a test config and put it in the cache """ PSetTweak = {'someKey': "Hello, I am a PSetTweak. It's nice to meet you."} configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test') configCache.createUserGroup(groupname = "testGroup", username = '******') configCache.setPSetTweaks(PSetTweak = PSetTweak) configCache.attachments['configFile'] = 'This Is A Test Config' configCache.save() return configCache def createTask(self, configCache): """ _createTask_ Create a test task that includes the fileURL """ task = WMTask.makeWMTask("testTask") task.makeStep("step1") task.makeStep("step2") for t in task.steps().nodeIterator(): t = WMStep.WMStepHelper(t) os.mkdir(os.path.join(self.testDir, t.name())) t.setStepType("CMSSW") t.data.application.section_('command') t.data.application.configuration.configCacheUrl = configCache.dburl t.data.application.configuration.cacheName = configCache.dbname t.data.application.configuration.configId = configCache.getCouchID() t.data.application.command.psetTweak = 'tweak' t.data.application.command.configuration = 'configCache.file' return task def testA_BasicFunction(self): """ _BasicFunction_ Run a test to find out if we can grab a configCache """ configCache = self.getConfig() task = self.createTask(configCache = configCache) fetcher = CMSSWFetcher() fetcher.setWorkingDirectory(workingDir = self.testDir) fetcher(wmTask = task) self.assertTrue(os.path.isfile(os.path.join(self.testDir, 'step2', 'tweak'))) f = open(os.path.join(self.testDir, 'step2', 'configCache.file')) content = f.read() f.close() self.assertEqual(content, 'This Is A Test Config') return
class CouchappTest(unittest.TestCase): def setUp(self): myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) self.databaseName = "couchapp_t_0" self.testInit.setupCouch(self.databaseName, "WorkloadSummary") self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump") self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump") # Setup config for couch connections config = self.testInit.getConfiguration() config.section_("JobStateMachine") config.JobStateMachine.couchDBName = self.databaseName # Create couch server and connect to databases self.couchdb = CouchServer(config.JobStateMachine.couchurl) self.jobsdatabase = self.couchdb.connectDatabase("%s/jobs" % config.JobStateMachine.couchDBName) self.fwjrdatabase = self.couchdb.connectDatabase("%s/fwjrs" % config.JobStateMachine.couchDBName) # Create changeState self.changeState = ChangeState(config) self.config = config # Create testDir self.testDir = self.testInit.generateWorkDir() return def tearDown(self): self.testInit.clearDatabase(modules = ["WMCore.WMBS"]) self.testInit.delWorkDir() #self.testInit.tearDownCouch() return def createWorkload(self, workloadName = 'Test', emulator = True): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.save(workloadName) return workload def createTestJobGroup(self, name = "TestWorkthrough", specLocation = "spec.xml", error = False, task = "/TestWorkload/ReReco", nJobs = 10): """ _createTestJobGroup_ Generate a test WMBS JobGroup with real FWJRs """ myThread = threading.currentThread() testWorkflow = Workflow(spec = specLocation, owner = "Simon", name = name, task = task) testWorkflow.create() testWMBSFileset = Fileset(name = name) testWMBSFileset.create() testFileA = File(lfn = makeUUID(), size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = makeUUID(), size = 1024, events = 10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run = 10, lumis = [12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() report = Report() if error: path = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") else: path = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "PerformanceReport2.pkl") report.load(filename = path) self.changeState.propagate(testJobGroup.jobs, 'created', 'new') self.changeState.propagate(testJobGroup.jobs, 'executing', 'created') self.changeState.propagate(testJobGroup.jobs, 'complete', 'executing') for job in testJobGroup.jobs: job['fwjr'] = report self.changeState.propagate(testJobGroup.jobs, 'jobfailed', 'complete') self.changeState.propagate(testJobGroup.jobs, 'exhausted', 'jobfailed') self.changeState.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup def testHighestJobID(self): """ _highestJobID_ This is a jobDump function that should tell us the highest jobID currently being stored in the couch DB. """ workloadPath = os.path.join(self.testDir, 'spec.pkl') workload = self.createWorkload(workloadName = workloadPath) testJobGroup = self.createTestJobGroup(name = workload.name(), specLocation = workloadPath, error = False, nJobs = 10) jobID = self.jobsdatabase.loadView("JobDump", "highestJobID")['rows'][0]['value'] self.assertEqual(jobID, 9) testJobGroup2 = self.createTestJobGroup(name = workload.name(), specLocation = workloadPath, error = False, nJobs = 10) jobID = self.jobsdatabase.loadView("JobDump", "highestJobID")['rows'][0]['value'] self.assertEqual(jobID, 19) return
class CouchappTest(unittest.TestCase): def setUp(self): myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.databaseName = "couchapp_t_0" self.testInit.setupCouch(self.databaseName, "WorkloadSummary") self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump") self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump") # Setup config for couch connections config = self.testInit.getConfiguration() config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL", "cmssrv52.fnal.gov:5984") config.JobStateMachine.couchDBName = self.databaseName # Create couch server and connect to databases self.couchdb = CouchServer(config.JobStateMachine.couchurl) self.jobsdatabase = self.couchdb.connectDatabase( "%s/jobs" % config.JobStateMachine.couchDBName) self.fwjrdatabase = self.couchdb.connectDatabase( "%s/fwjrs" % config.JobStateMachine.couchDBName) # Create changeState self.changeState = ChangeState(config) self.config = config # Create testDir self.testDir = self.testInit.generateWorkDir() return def tearDown(self): self.testInit.clearDatabase(modules=["WMCore.WMBS"]) self.testInit.delWorkDir() #self.testInit.tearDownCouch() return def createWorkload(self, workloadName='Test', emulator=True): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.save(workloadName) return workload def createTestJobGroup(self, name="TestWorkthrough", specLocation="spec.xml", error=False, task="/TestWorkload/ReReco", nJobs=10): """ _createTestJobGroup_ Generate a test WMBS JobGroup with real FWJRs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=specLocation, owner="Simon", name=name, task=task) testWorkflow.create() testWMBSFileset = Fileset(name=name) testWMBSFileset.create() testFileA = File(lfn=makeUUID(), size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn=makeUUID(), size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() report = Report() if error: path = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") else: path = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "PerformanceReport2.pkl") report.load(filename=path) self.changeState.propagate(testJobGroup.jobs, 'created', 'new') self.changeState.propagate(testJobGroup.jobs, 'executing', 'created') self.changeState.propagate(testJobGroup.jobs, 'complete', 'executing') for job in testJobGroup.jobs: job['fwjr'] = report self.changeState.propagate(testJobGroup.jobs, 'jobfailed', 'complete') self.changeState.propagate(testJobGroup.jobs, 'exhausted', 'jobfailed') self.changeState.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup def testHighestJobID(self): """ _highestJobID_ This is a jobDump function that should tell us the highest jobID currently being stored in the couch DB. """ workloadPath = os.path.join(self.testDir, 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(name=workload.name(), specLocation=workloadPath, error=False, nJobs=10) jobID = self.jobsdatabase.loadView("JobDump", "highestJobID")['rows'][0]['value'] self.assertEqual(jobID, 9) testJobGroup2 = self.createTestJobGroup(name=workload.name(), specLocation=workloadPath, error=False, nJobs=10) jobID = self.jobsdatabase.loadView("JobDump", "highestJobID")['rows'][0]['value'] self.assertEqual(jobID, 19) return
class ReportTest(unittest.TestCase): """ _ReportTest_ Unit tests for the Report class. """ def setUp(self): """ _setUp_ Figure out the location of the XML report produced by CMSSW. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase = True) self.testInit.setupCouch("report_t/fwjrs", "FWJRDump") self.xmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") self.badxmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWFailReport2.xml") self.skippedFilesxmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWSkippedNonExistentFile.xml") self.skippedAllFilesxmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWSkippedAll.xml") self.fallbackXmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWInputFallback.xml") self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ _tearDown_ Cleanup the databases. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def verifyInputData(self, report): """ _verifyInputData_ Verify that the input file in the Report class matches the input file in the XML generated by CMSSW. """ inputFiles = report.getInputFilesFromStep("cmsRun1") assert len(inputFiles) == 1, \ "Error: Wrong number of input files." assert inputFiles[0]["lfn"] == "/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: Wrong LFN on input file." assert inputFiles[0]["pfn"] == "dcap://cmsdca.fnal.gov:24137/pnfs/fnal.gov/usr/cms/WAX/11/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: Wrong PFN on input file." inputRun = list(inputFiles[0]["runs"]) assert len(inputRun) == 1, \ "Error: Wrong number of runs in input." assert inputRun[0].run == 122023, \ "Error: Wrong run number on input file." assert len(inputRun[0].lumis) == 1, \ "Error: Wrong number of lumis in input file." assert 215 in inputRun[0].lumis, \ "Error: Input file is missing lumis." assert inputFiles[0]["events"] == 2, \ "Error: Wrong number of events in input file." assert inputFiles[0]["size"] == 0, \ "Error: Wrong size in input file." assert inputFiles[0]["catalog"] == "trivialcatalog_file:/uscmst1/prod/sw/cms/SITECONF/T1_US_FNAL/PhEDEx/storage.xml?protocol=dcap", \ "Error: Catalog on input file is wrong." assert inputFiles[0]["guid"] == "142F3F42-C5D6-DE11-945D-000423D94494", \ "Error: GUID of input file is wrong." return def verifyRecoOutput(self, report): """ _verifyRecoOutput_ Verify that all the metadata in the RECO output module is correct. """ outputFiles = report.getFilesFromOutputModule("cmsRun1", "outputRECORECO") assert len(outputFiles) == 1, \ "Error: Wrong number of output files." assert outputFiles[0]["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root", \ "Error: Wrong LFN on output file: %s" % outputFiles[0]["lfn"] assert outputFiles[0]["pfn"] == "outputRECORECO.root", \ "Error: Wrong PFN on output file." outputRun = list(outputFiles[0]["runs"]) assert len(outputRun) == 1, \ "Error: Wrong number of runs in output." assert outputRun[0].run == 122023, \ "Error: Wrong run number on output file." assert len(outputRun[0].lumis) == 1, \ "Error: Wrong number of lumis in output file." assert 215 in outputRun[0].lumis, \ "Error: Output file is missing lumis." assert outputFiles[0]["events"] == 2, \ "Error: Wrong number of events in output file." assert outputFiles[0]["size"] == 0, \ "Error: Wrong size in output file." assert len(outputFiles[0]["input"]) == 1, \ "Error: Wrong number of input files." assert outputFiles[0]["input"][0] == "/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: LFN of input file is wrong." assert len(outputFiles[0]["checksums"]) == 0, \ "Error: There should be no checksums in output file." assert outputFiles[0]["catalog"] == "", \ "Error: Catalog on output file is wrong." assert outputFiles[0]["guid"] == "7E3359C8-222E-DF11-B2B0-001731230E47", \ "Error: GUID of output file is wrong: %s" % outputFiles[0]["guid"] assert outputFiles[0]["module_label"] == "outputRECORECO", \ "Error: Module label of output file is wrong." assert outputFiles[0]["branch_hash"] == "cf37adeb60b427f4ccd0e21b5771146b", \ "Error: Branch has on output file is wrong." return def verifyAlcaOutput(self, report): """ _verifyAlcaOutput_ Verify that all of the meta data in the ALCARECO output module is correct. """ outputFiles = report.getFilesFromOutputModule("cmsRun1", "outputALCARECORECO") assert len(outputFiles) == 1, \ "Error: Wrong number of output files." assert outputFiles[0]["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/ALCARECO/rereco_GR09_R_34X_V5_All_v1/0000/B8F849C9-222E-DF11-B2B0-001731230E47.root", \ "Error: Wrong LFN on output file: %s" % outputFiles[0]["lfn"] assert outputFiles[0]["pfn"] == "outputALCARECORECO.root", \ "Error: Wrong PFN on output file." outputRun = list(outputFiles[0]["runs"]) assert len(outputRun) == 1, \ "Error: Wrong number of runs in output." assert outputRun[0].run == 122023, \ "Error: Wrong run number on output file." assert len(outputRun[0].lumis) == 1, \ "Error: Wrong number of lumis in output file." assert 215 in outputRun[0].lumis, \ "Error: Output file is missing lumis." assert outputFiles[0]["events"] == 2, \ "Error: Wrong number of events in output file." assert outputFiles[0]["size"] == 0, \ "Error: Wrong size in output file." assert len(outputFiles[0]["input"]) == 1, \ "Error: Wrong number of input files." assert outputFiles[0]["input"][0] == "/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: LFN of input file is wrong." assert len(outputFiles[0]["checksums"]) == 0, \ "Error: There should be no checksums in output file." assert outputFiles[0]["catalog"] == "", \ "Error: Catalog on output file is wrong." assert outputFiles[0]["guid"] == "B8F849C9-222E-DF11-B2B0-001731230E47", \ "Error: GUID of output file is wrong: %s" % outputFiles[0]["guid"] assert outputFiles[0]["module_label"] == "outputALCARECORECO", \ "Error: Module label of output file is wrong." assert outputFiles[0]["branch_hash"] == "cf37adeb60b427f4ccd0e21b5771146b", \ "Error: Branch has on output file is wrong." return def testXMLParsing(self): """ _testParsing_ Verify that the parsing of a CMSSW XML report works correctly. """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) self.verifyInputData(myReport) self.verifyRecoOutput(myReport) self.verifyAlcaOutput(myReport) return def testBadXMLParsing(self): """ _testBadXMLParsing_ Verify that the parsing of a CMSSW XML report works correctly even if the XML is malformed. This should raise a FwkJobReportException, which in CMSSW will be caught """ myReport = Report("cmsRun1") from WMCore.FwkJobReport.Report import FwkJobReportException self.assertRaises(FwkJobReportException, myReport.parse, self.badxmlPath) self.assertEqual(myReport.getStepErrors("cmsRun1")['error0'].type, 'BadFWJRXML') self.assertEqual(myReport.getStepErrors("cmsRun1")['error0'].exitCode, 50115) return def testErrorReporting(self): """ _testErrorReporting_ Verify that errors are correctly transfered from the XML report to the python report. """ cmsException = \ """cms::Exception caught in cmsRun ---- EventProcessorFailure BEGIN EventProcessingStopped ---- ScheduleExecutionFailure BEGIN ProcessingStopped ---- NoRecord BEGIN No "CastorDbRecord" record found in the EventSetup. Please add an ESSource or ESProducer that delivers such a record. cms::Exception going through module CastorRawToDigi/castorDigis run: 121849 lumi: 1 event: 23 ---- NoRecord END Exception going through path raw2digi_step ---- ScheduleExecutionFailure END an exception occurred during current event processing cms::Exception caught in EventProcessor and rethrown ---- EventProcessorFailure END""" xmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWFailReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) assert hasattr(myReport.data.cmsRun1, "errors"), \ "Error: Error section missing." assert getattr(myReport.data.cmsRun1.errors, "errorCount") == 1, \ "Error: Error count is wrong." assert hasattr(myReport.data.cmsRun1.errors, "error0"), \ "Error: Error0 section is missing." assert myReport.data.cmsRun1.errors.error0.type == "CMSException", \ "Error: Wrong error type." assert myReport.data.cmsRun1.errors.error0.exitCode == "8001", \ "Error: Wrong exit code." assert myReport.data.cmsRun1.errors.error0.details == cmsException, \ "Error: Error details are wrong:\n|%s|\n|%s|" % (myReport.data.cmsRun1.errors.error0.details, cmsException) # Test getStepErrors self.assertEqual(myReport.getStepErrors("cmsRun1")['error0'].type, "CMSException") return def testMultipleInputs(self): """ _testMultipleInputs_ Verify that parsing XML reports with multiple inputs works correctly. """ xmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWMultipleInput.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) assert hasattr(myReport.data.cmsRun1.input, "source"), \ "Error: Report missing input source." inputFiles = myReport.getInputFilesFromStep("cmsRun1") assert len(inputFiles) == 2, \ "Error: Wrong number of input files." for inputFile in inputFiles: assert inputFile["input_type"] == "primaryFiles", \ "Error: Wrong input type." assert inputFile["module_label"] == "source", \ "Error: Module label is wrong" assert inputFile["catalog"] == "trivialcatalog_file:/uscmst1/prod/sw/cms/SITECONF/T1_US_FNAL/PhEDEx/storage.xml?protocol=dcap", \ "Error: Catalog is wrong." assert inputFile["events"] == 2, \ "Error: Wrong number of events." assert inputFile["input_source_class"] == "PoolSource", \ "Error: Wrong input source class." if inputFile["guid"] == "F0875ECD-3347-DF11-9FE0-003048678A80": assert inputFile["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/F0875ECD-3347-DF11-9FE0-003048678A80.root", \ "Error: Input LFN is wrong." assert inputFile["pfn"] == "dcap://cmsdca3.fnal.gov:24142/pnfs/fnal.gov/usr/cms/WAX/11/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/F0875ECD-3347-DF11-9FE0-003048678A80.root", \ "Error: Input PFN is wrong." assert len(inputFile["runs"]) == 1, \ "Error: Wrong number of runs." assert list(inputFile["runs"])[0].run == 124216, \ "Error: Wrong run number." assert 1 in list(inputFile["runs"])[0], \ "Error: Wrong lumi sections in input file." else: assert inputFile["guid"] == "626D74CE-3347-DF11-9363-0030486790C0", \ "Error: Wrong guid." assert inputFile["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/626D74CE-3347-DF11-9363-0030486790C0.root", \ "Error: Input LFN is wrong." assert inputFile["pfn"] == "dcap://cmsdca3.fnal.gov:24142/pnfs/fnal.gov/usr/cms/WAX/11/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/626D74CE-3347-DF11-9363-0030486790C0.root", \ "Error: Input PFN is wrong." assert len(inputFile["runs"]) == 1, \ "Error: Wrong number of runs." assert list(inputFile["runs"])[0].run == 124216, \ "Error: Wrong run number." assert 2 in list(inputFile["runs"])[0], \ "Error: Wrong lumi sections in input file." return def testJSONEncoding(self): """ _testJSONEncoding_ Verify that turning the FWJR into a JSON object works correctly. """ xmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) jsonReport = myReport.__to_json__(None) assert "task" in jsonReport.keys(), \ "Error: Task name missing from report." assert len(jsonReport["steps"].keys()) == 1, \ "Error: Wrong number of steps in report." assert "cmsRun1" in jsonReport["steps"].keys(), \ "Error: Step missing from json report." cmsRunStep = jsonReport["steps"]["cmsRun1"] jsonReportSections = ["status", "errors", "logs", "parameters", "site", "analysis", "cleanup", "input", "output", "start"] for jsonReportSection in jsonReportSections: assert jsonReportSection in cmsRunStep.keys(), \ "Error: missing section: %s" % jsonReportSection return def testTimeSetting(self): """ _testTimeSetting_ Can we set the times correctly? """ stepName = "cmsRun1" timeDiff = 0.01 myReport = Report(stepName) localTime = time.time() myReport.setStepStartTime(stepName) myReport.setStepStopTime(stepName) repTime = myReport.getTimes(stepName) self.assertTrue(repTime["startTime"] - localTime < timeDiff) self.assertTrue(repTime["stopTime"] - localTime < timeDiff) myReport = Report("cmsRun1") myReport.addStep("cmsRun2") myReport.addStep("cmsRun3") step = myReport.retrieveStep("cmsRun1") step.startTime = 1 step.stopTime = 8 step = myReport.retrieveStep("cmsRun2") step.startTime = 2 step.stopTime = 9 step = myReport.retrieveStep("cmsRun3") step.startTime = 3 step.stopTime = 10 self.assertEqual(myReport.getFirstStartLastStop()['stopTime'], 10) self.assertEqual(myReport.getFirstStartLastStop()['startTime'], 1) return def testTaskJobID(self): """ _testTaskJobID_ Test the basic task and jobID functions """ report = Report('fake') self.assertEqual(report.getTaskName(), None) self.assertEqual(report.getJobID(), None) report.setTaskName('silly') report.setJobID(100) self.assertEqual(report.getTaskName(), 'silly') self.assertEqual(report.getJobID(), 100) return def test_PerformanceReport(self): """ _PerformanceReport_ Test the performance report part of the job report """ report = Report("cmsRun1") report.setStepVSize(stepName = "cmsRun1", min = 100, max = 800, average = 244) report.setStepRSS(stepName = "cmsRun1", min = 100, max = 800, average = 244) report.setStepPCPU(stepName = "cmsRun1", min = 100, max = 800, average = 244) report.setStepPMEM(stepName = "cmsRun1", min = 100, max = 800, average = 244) perf = report.retrieveStep("cmsRun1").performance for section in perf.dictionary_().values(): d = section.dictionary_() self.assertEqual(d['min'], 100) self.assertEqual(d['max'], 800) self.assertEqual(d['average'], 244) return def testPerformanceSummary(self): """ _testPerformanceSummary_ Test whether or not we can pull performance information out of a Timing/SimpleMemoryCheck jobReport """ xmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) # Do a brief check of the three sections perf = myReport.data.cmsRun1.performance self.assertEqual(perf.memory.PeakValueRss, '492.293') self.assertEqual(perf.cpu.TotalJobCPU, '9.16361') self.assertEqual(perf.storage.writeTotalMB, 5.22226) self.assertEqual(perf.storage.writeTotalSecs, 60317.4) self.assertEqual(perf.storage.readPercentageOps, 0.98585512216030857) return def testPerformanceJSON(self): """ _testPerformanceJSON_ Verify that the performance section of the report is correctly converted to JSON. """ xmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) perfSection = myReport.__to_json__(thunker = None)["steps"]["cmsRun1"]["performance"] self.assertTrue(perfSection.has_key("storage"), "Error: Storage section is missing.") self.assertTrue(perfSection.has_key("memory"), "Error: Memory section is missing.") self.assertTrue(perfSection.has_key("cpu"), "Error: CPU section is missing.") self.assertEqual(perfSection["cpu"]["AvgEventCPU"], "0.626105", "Error: AvgEventCPU is wrong.") self.assertEqual(perfSection["cpu"]["TotalJobTime"], "23.5703", "Error: TotalJobTime is wrong.") self.assertEqual(perfSection["storage"]["readTotalMB"], 39.6166, "Error: readTotalMB is wrong.") self.assertEqual(perfSection["storage"]["readMaxMSec"], 320.653, "Error: readMaxMSec is wrong") self.assertEqual(perfSection["memory"]["PeakValueRss"], "492.293", "Error: PeakValueRss is wrong.") self.assertEqual(perfSection["memory"]["PeakValueVsize"], "643.281", "Error: PeakValueVsize is wrong.") return def testExitCode(self): """ _testExitCode_ Test and see if we can get an exit code out of a report Note: Errors without a return code return 99999 """ report = Report("cmsRun1") self.assertEqual(report.getExitCode(), 0) report.addError(stepName = "cmsRun1", exitCode = None, errorType = "test", errorDetails = "test") self.assertEqual(report.getExitCode(), 99999) self.assertEqual(report.getStepExitCode(stepName = "cmsRun1"), 99999) report.addError(stepName = "cmsRun1", exitCode = '12345', errorType = "test", errorDetails = "test") self.assertEqual(report.getExitCode(), 12345) self.assertEqual(report.getStepExitCode(stepName = "cmsRun1"), 12345) def testProperties(self): """ _testProperties_ Test data fields for the properties information for DBS """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) name = "ThisIsASillyString" myReport.setValidStatus(name) myReport.setGlobalTag(name) myReport.setAcquisitionProcessing(acquisitionEra = 'NULL', processingVer = name) myReport.setInputDataset(inputPath = '/lame/path') for f in myReport.getAllFilesFromStep("cmsRun1"): self.assertEqual(f['globalTag'], name) self.assertEqual(f['validStatus'], name) self.assertEqual(f['processingVer'], name) self.assertEqual(f['acquisitionEra'], 'NULL') self.assertEqual(f['inputPath'], '/lame/path') return def testOutputFiles(self): """ _testOutputFiles_ Test some basic manipulation of output files """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) files = myReport.getAllFilesFromStep(step = "cmsRun1") f1 = files[0] f2 = files[1] self.assertEqual(f1['outputModule'], 'outputRECORECO') self.assertEqual(f1['pfn'], 'outputRECORECO.root') self.assertEqual(f2['outputModule'], 'outputALCARECORECO') self.assertEqual(f2['pfn'], 'outputALCARECORECO.root') for f in files: self.assertEqual(f['events'], 2) self.assertEqual(f['configURL'], None) self.assertEqual(f['merged'], False) self.assertEqual(f['validStatus'], None) self.assertEqual(f['first_event'], 0) return def testGetAdlerChecksum(self): """ _testGetAdlerChecksum_ Test the function that sees if all files have an adler checksum. For some reason, our default XML report doesn't have checksums Therefore it should fail. """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) myReport.checkForAdlerChecksum(stepName = "cmsRun1") self.assertFalse(myReport.stepSuccessful(stepName = "cmsRun1")) self.assertEqual(myReport.getExitCode(), 60451) # Now see what happens if the adler32 is set to None myReport2 = Report("cmsRun1") myReport2.parse(self.xmlPath) fRefs = myReport2.getAllFileRefsFromStep(step = "cmsRun1") for fRef in fRefs: fRef.checksums = {'adler32': None} myReport2.checkForAdlerChecksum(stepName = "cmsRun1") self.assertFalse(myReport2.stepSuccessful(stepName = "cmsRun1")) self.assertEqual(myReport2.getExitCode(), 60451) myReport3 = Report("cmsRun1") myReport3.parse(self.xmlPath) fRefs = myReport3.getAllFileRefsFromStep(step = "cmsRun1") for fRef in fRefs: fRef.checksums = {'adler32': 100} myReport3.checkForAdlerChecksum(stepName = "cmsRun1") self.assertTrue(myReport3.getExitCode() != 60451) return def testCheckLumiInformation(self): """ _testCheckLumiInformation_ Test the function that checks if all files have run lumi information """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) myReport.checkForRunLumiInformation(stepName = "cmsRun1") self.assertNotEqual(myReport.getExitCode(), 60452) # Remove the lumi information on purpose myReport2 = Report("cmsRun1") myReport2.parse(self.xmlPath) fRefs = myReport2.getAllFileRefsFromStep(step = "cmsRun1") for fRef in fRefs: fRef.runs = ConfigSection() myReport2.checkForRunLumiInformation(stepName = "cmsRun1") self.assertFalse(myReport2.stepSuccessful(stepName = "cmsRun1")) self.assertEqual(myReport2.getExitCode(), 60452) return def testTaskSuccessful(self): """ _testTaskSuccessful_ Test whether or not the report marks the task successful """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) # First, the report should fail self.assertFalse(myReport.taskSuccessful()) # Second, if we ignore cmsRun, the task # should succeed self.assertTrue(myReport.taskSuccessful(ignoreString = 'cmsRun')) return def testMultiCoreReport(self): """ _testMultiCoreReport_ Verify that multicore reports can be json encoded and uploaded to couch. """ couchdb = CouchServer(os.environ["COUCHURL"]) fwjrdatabase = couchdb.connectDatabase("report_t/fwjrs") self.mcPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/MulticoreReport.pkl") myReport = Report() myReport.unpersist(self.mcPath) fwjrDocument = {"_id": "303-0", "jobid": 303, "retrycount": 0, "fwjr": myReport.__to_json__(None), "type": "fwjr"} fwjrdatabase.queue(fwjrDocument, timestamp = True) fwjrdatabase.commit() return def testStripReport(self): """ _testStripReport_ Test whether or not we can strip input file information from a FWJR and create a smaller object. """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) path1 = os.path.join(self.testDir, 'testReport1.pkl') path2 = os.path.join(self.testDir, 'testReport2.pkl') myReport.save(path1) info = BasicAlgos.getFileInfo(filename = path1) self.assertEqual(info['Size'], 7101) inputFiles = myReport.getAllInputFiles() self.assertEqual(len(inputFiles), 1) myReport.stripInputFiles() self.assertEqual(len(myReport.getAllInputFiles()), 0) myReport.save(path2) info = BasicAlgos.getFileInfo(filename = path2) self.assertEqual(info['Size'], 6210) return def testDuplicatStep(self): """ _testDuplicateStep_ If the same step is added twice, it should act as a replacement, and raise an appropriate message """ baseReport = Report("cmsRun1") baseReport.parse(self.xmlPath) modReport = Report("cmsRun1") modReport.parse(self.xmlPath) setattr(modReport.data.cmsRun1, 'testVar', 'test01') report = Report() report.setStep(stepName = 'cmsRun1', stepSection = baseReport.retrieveStep('cmsRun1')) report.setStep(stepName = 'cmsRun1', stepSection = modReport.retrieveStep('cmsRun1')) self.assertEqual(report.listSteps(), ['cmsRun1']) self.assertEqual(report.data.cmsRun1.testVar, 'test01') return def testDeleteOutputModule(self): """ _testDeleteOutputModule_ If asked delete an output module, if it doesn't exist then do nothing """ originalReport = Report("cmsRun1") originalReport.parse(self.xmlPath) self.assertTrue(originalReport.getOutputModule("cmsRun1", "outputALCARECORECO"), "Error: Report XML doesn't have the module for the test, invalid test") originalOutputModules = len(originalReport.retrieveStep("cmsRun1").outputModules) originalReport.deleteOutputModuleForStep("cmsRun1", "outputALCARECORECO") self.assertFalse(originalReport.getOutputModule("cmsRun1", "outputALCARECORECO"), "Error: The output module persists after deletion") self.assertEqual(len(originalReport.retrieveStep("cmsRun1").outputModules), originalOutputModules - 1, "Error: The number of output modules is incorrect after deletion") def testSkippedFiles(self): """ _testSkippedFiles_ Test that skipped files are translated from FWJR into report """ # Check a report where some files were skipped but not all originalReport = Report("cmsRun1") originalReport.parse(self.skippedFilesxmlPath) self.assertEqual(originalReport.getAllSkippedFiles(), ['/store/data/Run2012D/Cosmics/RAW/v1/000/206/379/1ED243E7-A611-E211-A851-0019B9F581C9.root']) # For negative control, check a good report with no skipped files goodReport = Report("cmsRun1") goodReport.parse(self.xmlPath) self.assertEqual(goodReport.getAllSkippedFiles(), []) # Check a report where all files were skipped badReport = Report("cmsRun1") badReport.parse(self.skippedAllFilesxmlPath) self.assertEqual(sorted(badReport.getAllSkippedFiles()), ['/store/data/Run2012D/Cosmics/RAW/v1/000/206/379/1ED243E7-A611-E211-A851-0019B9F581C9.root', '/store/data/Run2012D/Cosmics/RAW/v1/000/206/379/1ED243E7-A622-E211-A851-0019B9F581C.root']) return def testSkippedFilesJSON(self): """ _testSkippedFilesJSON_ Test that skipped files are translated properly into JSON """ # Check a report where some files were skipped but not all originalReport = Report("cmsRun1") originalReport.parse(self.skippedFilesxmlPath) originalJSON = originalReport.__to_json__(None) self.assertEqual(len(originalJSON['skippedFiles']), 1) # For negative control, check a good report with no skipped files goodReport = Report("cmsRun1") goodReport.parse(self.xmlPath) goodJSON = goodReport.__to_json__(None) self.assertEqual(goodJSON['skippedFiles'], []) # Check a report where all files were skipped badReport = Report("cmsRun1") badReport.parse(self.skippedAllFilesxmlPath) badJSON = badReport.__to_json__(None) self.assertEqual(len(badJSON['skippedFiles']), 2) return def testFallbackFiles(self): """ _testFallback_ Test that fallback files end up in the report """ # For negative control, check a good report with no fallback files goodReport = Report("cmsRun1") goodReport.parse(self.xmlPath) self.assertEqual(goodReport.getAllFallbackFiles(), []) # Check a report where the file was a fallback badReport = Report("cmsRun1") badReport.parse(self.fallbackXmlPath) self.assertEqual(sorted(badReport.getAllFallbackFiles()), ['/store/data/Run2012D/SingleElectron/AOD/PromptReco-v1/000/207/279/D43A5B72-1831-E211-895D-001D09F24763.root']) return def testFallbackFilesJSON(self): """ _testFallbackFilesJSON_ Test that fallback attempt files are translated properly into JSON """ # For negative control, check a good report with no skipped files goodReport = Report("cmsRun1") goodReport.parse(self.xmlPath) goodJSON = goodReport.__to_json__(None) self.assertEqual(goodJSON['fallbackFiles'], []) # Check a report where all files were skipped badReport = Report("cmsRun1") badReport.parse(self.fallbackXmlPath) badJSON = badReport.__to_json__(None) self.assertEqual(len(badJSON['fallbackFiles']), 1) return def testOutputCheck(self): """ _testOutputCheck_ Check that we can identify bad reports with no output files """ badReport = Report("cmsRun1") badReport.parse(self.skippedAllFilesxmlPath) badReport.checkForOutputFiles("cmsRun1") self.assertFalse(badReport.stepSuccessful(stepName = "cmsRun1")) self.assertEqual(badReport.getExitCode(), 60450) return
class DashboardReporterTest(unittest.TestCase): """ _DashboardReporterTest_ Test class for dashboardReporter """ def setUp(self): """ _setUp_ Setup the database and logging connection. Try to create all of the WMBS tables. Also, create some dummy locations. """ myThread = threading.currentThread() self.sites = ['T2_US_Florida', 'T2_US_UCSD', 'T2_TW_Taiwan', 'T1_CH_CERN'] self.testInit = TestInit(__file__) self.testInit.setLogging(logLevel = logging.DEBUG) self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ['WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database'], useDefault = False) self.testInit.setupCouch("dashboardreporter_t/jobs", "JobDump") self.testInit.setupCouch("dashboardreporter_t/fwjrs", "FWJRDump") resourceControl = ResourceControl() for site in self.sites: resourceControl.insertSite(siteName = site, seName = site, ceName = site) resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \ maxSlots = 10000) self.testDir = self.testInit.generateWorkDir() self.alertsReceiver = None return def tearDown(self): """ _tearDown_ Rip things down. """ self.testInit.clearDatabase() self.testInit.delWorkDir() self.testInit.tearDownCouch() if self.alertsReceiver: self.alertsReceiver.shutdown() return def getConfig(self): """ _getConfig_ Creates a common config. """ config = Configuration() #First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", os.getcwd()) config.section_("Agent") config.Agent.componentName = "DashboardReporter" config.Agent.useHeartbeat = False config.section_("DashboardReporter") config.DashboardReporter.dashboardHost = "cmssrv52.fnal.gov" config.DashboardReporter.dashboardPort = 8884 #JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', 'cmssrv52.fnal.gov:5984') config.JobStateMachine.couchDBName = "dashboardreporter_t" # addition for Alerts messaging framework, work (alerts) and control # channel addresses to which the component will be sending alerts # these are destination addresses where AlertProcessor:Receiver listens config.section_("Alert") config.Alert.address = "tcp://127.0.0.1:5557" config.Alert.controlAddr = "tcp://127.0.0.1:5559" return config def createWorkload(self, workloadName = 'Test', emulator = True): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") rereco = workload.getTask("ReReco") # Add RequestManager stuff workload.data.request.section_('schema') workload.data.request.schema.Requestor = 'nobody' workload.data.request.schema.Group = 'testers' taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.save(workloadName) return workload def createTestJobGroup(self, nJobs = 10, retry_count = 0, workloadPath = 'test'): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec = workloadPath, owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFile0 = File(lfn = "/this/is/a/parent", size = 1024, events = 10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('malpaquet') testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation('malpaquet') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn = "/this/is/a/parent") testFileB.addParent(lfn = "/this/is/a/parent") for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['group'] = 'BadGuys' testJob['user'] = '******' testJob['taskType'] = 'Merge' #testJob['fwjr'] = myReport testJobGroup.add(testJob) testJob.create(group = testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files = [testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup def testA_testSubmit(self): """ _testSubmit_ Test whether we pick up submitted jobs """ #workload = self.createWorkload() jobGroup = self.createTestJobGroup() config = self.getConfig() xmlPath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) changer = ChangeState(config) for job in jobGroup.jobs: job['fwjr'] = myReport changer.propagate(jobGroup.jobs, "complete", "executing") changer.propagate(jobGroup.jobs, "success", "complete") dashboardReporter = DashboardReporterPoller(config = config) dashboardReporter.algorithm() # What the hell am I supposed to check? changer.propagate(jobGroup.jobs, 'jobfailed', 'executing') dashboardReporter.algorithm() return def testB_CheckExecutingJobsAndProfile(self): """ _CheckExecutingJobsAndProfile_ Pull up some executing jobs and profile them. """ return jobGroup = self.createTestJobGroup() config = self.getConfig() changer = ChangeState(config) changer.propagate(jobGroup.jobs, "executing", "created") dashboardReporter = DashboardReporterPoller(config = config) import cProfile, pstats cProfile.runctx("dashboardReporter.algorithm()", globals(), locals(), filename = "testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats(.2) #dashboardReporter.algorithm() return def testC_DashboardReporterPollerAlertSending_algorithm(self): """ Cause exception (alert-worthy situation) in the algorithm() method. """ myThread = threading.currentThread() config = self.getConfig() handler, self.alertsReceiver = \ utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr) # emulate exception behaviour def raiseException(): raise Exception("My test exception.") dashboardReporter = DashboardReporterPoller(config = config) dashboardReporter.pollCouch = raiseException self.assertRaises(Exception, dashboardReporter.algorithm) # wait for the generated alert to arrive while len(handler.queue) == 0: time.sleep(0.3) print "%s waiting for alert to arrive ..." % inspect.stack()[0][3] self.alertsReceiver.shutdown() self.alertsReceiver = None # now check if the alert was properly sent self.assertEqual(len(handler.queue), 1) alert = handler.queue[0] self.assertEqual(alert["Source"], dashboardReporter.__class__.__name__) self.assertEqual(alert["Component"], "DashboardReporter") return
class JobSubmitterTest(EmulatedUnitTestCase): """ _JobSubmitterTest_ Test class for the JobSubmitterPoller """ def setUp(self): """ _setUp_ Standard setup: Now with 100% more couch """ super(JobSubmitterTest, self).setUp() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database" ]) self.testInit.setupCouch("jobsubmitter_t/jobs", "JobDump") self.testInit.setupCouch("jobsubmitter_t/fwjrs", "FWJRDump") self.testInit.setupCouch("wmagent_summary_t", "WMStats") myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.baDaoFactory = DAOFactory(package="WMCore.BossAir", logger=myThread.logger, dbinterface=myThread.dbi) self.testDir = self.testInit.generateWorkDir() # Set heartbeat self.componentName = 'JobSubmitter' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() self.configFile = EmulatorSetup.setupWMAgentConfig() config = self.getConfig() myThread.logdbClient = MockLogDB(config.General.central_logdb_url, config.Agent.hostName, logger=None) return def tearDown(self): """ _tearDown_ Standard tearDown """ myThread = threading.currentThread() self.testInit.clearDatabase() self.testInit.delWorkDir() self.testInit.tearDownCouch() EmulatorSetup.deleteConfig(self.configFile) myThread.logdbClient = None return def setResourceThresholds(self, site, **options): """ _setResourceThresholds_ Utility to set resource thresholds """ if not options: options = { 'state': 'Normal', 'runningSlots': 10, 'pendingSlots': 5, 'tasks': ['Processing', 'Merge'], 'Processing': { 'pendingSlots': 5, 'runningSlots': 10 }, 'Merge': { 'pendingSlots': 2, 'runningSlots': 5 } } resourceControl = ResourceControl() resourceControl.insertSite(siteName=site, pnn='se.%s' % (site), ceName=site, plugin="MockPlugin", pendingSlots=options['pendingSlots'], runningSlots=options['runningSlots'], cmsName=site) for task in options['tasks']: resourceControl.insertThreshold( siteName=site, taskType=task, maxSlots=options[task]['runningSlots'], pendingSlots=options[task]['pendingSlots']) if options.get('state'): resourceControl.changeSiteState(site, options.get('state')) return def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, taskType='Processing', name=None): """ _createJobGroups_ Creates a series of jobGroups for submissions """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production") testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file # site = self.sites[0] testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10) fileset.addFile(testFile) fileset.commit() location = None if isinstance(site, list): if len(site) > 0: location = site[0] else: location = site index = 0 for f in fileset.files: index += 1 testJob = Job(name='%s-%i' % (name, index)) testJob.addFile(f) testJob["location"] = location testJob["possiblePSN"] = set(site) if isinstance( site, list) else set([site]) testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob['priority'] = 101 testJob['numberOfCores'] = 1 jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'), 'w') pickle.dump(testJob, output) output.close() return testJob, testFile def getConfig(self): """ _getConfig_ Gets a basic config from default location """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) config.component_("Agent") config.Agent.WMSpecDirectory = self.testDir config.Agent.agentName = 'testAgent' config.Agent.hostName = 'testAgent' config.Agent.componentName = self.componentName config.Agent.useHeartbeat = False # First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", self.testDir) config.General.central_logdb_url = "http://localhost/testlogdb" # Now the CoreDatabase information config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") # BossAir and MockPlugin configuration config.section_("BossAir") config.BossAir.pluginNames = ['MockPlugin'] # Here Test the CondorPlugin instead of MockPlugin # config.BossAir.pluginNames = ['CondorPlugin'] config.BossAir.pluginDir = 'WMCore.BossAir.Plugins' config.BossAir.nCondorProcesses = 1 config.BossAir.section_("MockPlugin") config.BossAir.MockPlugin.fakeReport = os.path.join( getTestBase(), 'WMComponent_t/JobSubmitter_t', "submit.sh") # JobSubmitter configuration config.component_("JobSubmitter") config.JobSubmitter.logLevel = 'DEBUG' config.JobSubmitter.maxThreads = 1 config.JobSubmitter.pollInterval = 10 config.JobSubmitter.submitScript = os.path.join( getTestBase(), 'WMComponent_t/JobSubmitter_t', 'submit.sh') config.JobSubmitter.componentDir = os.path.join( self.testDir, 'Components') config.JobSubmitter.workerThreads = 2 config.JobSubmitter.jobsPerWorker = 200 # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL') config.JobStateMachine.couchDBName = "jobsubmitter_t" config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t' # TaskArchive setup (JobSubmitter needs this) config.component_("TaskArchiver") config.TaskArchiver.ReqMgr2ServiceURL = "https://cmsweb-dev.cern.ch/reqmgr2" # Needed, because this is a test try: os.makedirs(config.JobSubmitter.componentDir) except: pass return config def createTestWorkload(self): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload() taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() self.workloadSpecPath = os.path.join( self.testDir, 'workloadTest', "TestWorkload/WMSandbox/WMWorkload.pkl") return workload def testA_BasicTest(self): """ Use the MockPlugin to create a simple test Check to see that all the jobs were "submitted", don't care about thresholds """ workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 2 nJobs = 20 site = "T2_US_UCSD" self.setResourceThresholds(site, pendingSlots=50, runningSlots=100, tasks=['Processing', 'Merge'], Processing={ 'pendingSlots': 50, 'runningSlots': 100 }, Merge={ 'pendingSlots': 50, 'runningSlots': 100 }) jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Do pre-submit check getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) jobSubmitter = JobSubmitterPoller(config=config) jobSubmitter.algorithm() # Check that jobs are in the right state result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) # Check assigned locations getLocationAction = self.daoFactory(classname="Jobs.GetLocation") for jobId in result: loc = getLocationAction.execute(jobid=jobId) self.assertEqual(loc, [['T2_US_UCSD']]) # Run another cycle, it shouldn't submit anything. There isn't anything to submit jobSubmitter.algorithm() result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 0) result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) nSubs = 1 nJobs = 10 # Submit another 10 jobs jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site, taskType="Merge") for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Check that the jobs are available for submission and run another cycle result = getJobsAction.execute(state='Created', jobType="Merge") self.assertEqual(len(result), nSubs * nJobs) jobSubmitter.algorithm() # Check that the last 10 jobs were submitted as well. result = getJobsAction.execute(state='Created', jobType="Merge") self.assertEqual(len(result), 0) result = getJobsAction.execute(state='Executing', jobType="Merge") self.assertEqual(len(result), nSubs * nJobs) return def testB_thresholdTest(self): """ _testB_thresholdTest_ Check that the threshold management is working, this requires checks on pending/running jobs globally at a site and per task/site """ workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 5 nJobs = 10 site = "T1_US_FNAL" self.setResourceThresholds(site, pendingSlots=50, runningSlots=220, tasks=['Processing', 'Merge'], Processing={ 'pendingSlots': 45, 'runningSlots': 200 }, Merge={ 'pendingSlots': 10, 'runningSlots': 20, 'priority': 5 }) # Always initialize the submitter after setting the sites, flaky! jobSubmitter = JobSubmitterPoller(config=config) jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Do pre-submit check getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) jobSubmitter.algorithm() # Check that jobs are in the right state, # here we are limited by the pending threshold for the Processing task (45) result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 5) result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), 45) # Check assigned locations getLocationAction = self.daoFactory(classname="Jobs.GetLocation") for jobId in result: loc = getLocationAction.execute(jobid=jobId) self.assertEqual(loc, [['T1_US_FNAL']]) # Run another cycle, it shouldn't submit anything. Jobs are still in pending jobSubmitter.algorithm() result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 5) result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), 45) # Now put 10 Merge jobs, only 5 can be submitted, there we hit the global pending threshold for the site nSubs = 1 nJobs = 10 jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site, taskType='Merge') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() result = getJobsAction.execute(state='Created', jobType="Merge") self.assertEqual(len(result), 5) result = getJobsAction.execute(state='Executing', jobType="Merge") self.assertEqual(len(result), 5) result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 5) result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), 45) # Now let's test running thresholds # The scenario will be setup as follows: Move all current jobs as running # Create 300 Processing jobs and 300 merge jobs # Run 5 polling cycles, moving all pending jobs to running in between # Result is, merge is left at 30 running 0 pending and processing is left at 240 running 0 pending # Processing has 110 jobs in queue and Merge 280 # This tests all threshold dynamics including the prioritization of merge over processing nSubs = 1 nJobs = 300 jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site) jobGroupList.extend( self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site, taskType='Merge')) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') getRunJobID = self.baDaoFactory(classname="LoadByWMBSID") setRunJobStatus = self.baDaoFactory(classname="SetStatus") for i in range(5): result = getJobsAction.execute(state='Executing') binds = [] for jobId in result: binds.append({'id': jobId, 'retry_count': 0}) runJobIds = getRunJobID.execute(binds) setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running') jobSubmitter.algorithm() result = getJobsAction.execute(state='Executing', jobType='Processing') self.assertEqual(len(result), 240) result = getJobsAction.execute(state='Created', jobType='Processing') self.assertEqual(len(result), 110) result = getJobsAction.execute(state='Executing', jobType='Merge') self.assertEqual(len(result), 30) result = getJobsAction.execute(state='Created', jobType='Merge') self.assertEqual(len(result), 280) return def testC_prioritization(self): """ _testC_prioritization_ Check that jobs are prioritized by job type and by oldest workflow """ workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 1 nJobs = 10 site = "T1_US_FNAL" self.setResourceThresholds(site, pendingSlots=10, runningSlots=10000, tasks=['Processing', 'Merge'], Processing={ 'pendingSlots': 50, 'runningSlots': 10000 }, Merge={ 'pendingSlots': 10, 'runningSlots': 10000, 'priority': 5 }) # Always initialize the submitter after setting the sites, flaky! jobSubmitter = JobSubmitterPoller(config=config) jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site, name='OldestWorkflow') jobGroupList.extend( self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site, taskType='Merge')) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() # Merge goes first getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Merge") self.assertEqual(len(result), 0) result = getJobsAction.execute(state='Executing', jobType="Merge") self.assertEqual(len(result), 10) result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 10) result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), 0) # Create a newer workflow processing, and after some new jobs for an old workflow jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site, name='OldestWorkflow') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site, name='NewestWorkflow') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Move pending jobs to running getRunJobID = self.baDaoFactory(classname="LoadByWMBSID") setRunJobStatus = self.baDaoFactory(classname="SetStatus") for idx in range(2): result = getJobsAction.execute(state='Executing') binds = [] for jobId in result: binds.append({'id': jobId, 'retry_count': 0}) runJobIds = getRunJobID.execute(binds) setRunJobStatus.execute([x['id'] for x in runJobIds], 'Running') # Run again on created workflows jobSubmitter.algorithm() result = getJobsAction.execute(state='Created', jobType="Merge") self.assertEqual(len(result), 0) result = getJobsAction.execute(state='Executing', jobType="Merge") self.assertEqual(len(result), 10) result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 30 - (idx + 1) * 10) result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), (idx + 1) * 10) # Check that older workflow goes first even with newer jobs getWorkflowAction = self.daoFactory( classname="Jobs.GetWorkflowTask") workflows = getWorkflowAction.execute(result) for workflow in workflows: self.assertEqual(workflow['name'], 'OldestWorkflow') return def testD_SubmitFailed(self): """ _testD_SubmitFailed_ Check if jobs without a possible site to run at go to SubmitFailed """ workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 2 nJobs = 10 jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), site=[], workloadSpec=self.workloadSpecPath) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config=config) jobSubmitter.algorithm() # Jobs should go to submit failed getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='SubmitFailed', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) return def testE_SiteModesTest(self): """ _testE_SiteModesTest_ Test the behavior of the submitter in response to the different states of the sites """ workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 1 nJobs = 20 sites = [ 'T2_US_Florida', 'T2_TW_Taiwan', 'T3_CO_Uniandes', 'T1_US_FNAL' ] for site in sites: self.setResourceThresholds(site, pendingSlots=10, runningSlots=-1, tasks=['Processing', 'Merge'], Processing={ 'pendingSlots': 10, 'runningSlots': -1 }, Merge={ 'pendingSlots': 10, 'runningSlots': -1, 'priority': 5 }) myResourceControl = ResourceControl(config) myResourceControl.changeSiteState('T2_US_Florida', 'Draining') # First test that we prefer Normal over drain, and T1 over T2/T3 jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, site=[x for x in sites], task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter = JobSubmitterPoller(config=config) # Actually run it jobSubmitter.algorithm() getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) # All jobs should be at either FNAL, Taiwan or Uniandes. It's a random selection # Check assigned locations getLocationAction = self.daoFactory(classname="Jobs.GetLocation") locationDict = getLocationAction.execute([{ 'jobid': x } for x in result]) for entry in locationDict: loc = entry['site_name'] self.assertNotEqual(loc, 'T2_US_Florida') # Now set everything to down, check we don't submit anything for site in sites: myResourceControl.changeSiteState(site, 'Down') jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, site=[x for x in sites], task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() # Nothing is submitted despite the empty slots at Uniandes and Florida result = getJobsAction.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nSubs * nJobs) # Now set everything to Drain and create Merge jobs. Those should be submitted for site in sites: myResourceControl.changeSiteState(site, 'Draining') nSubsMerge = 1 nJobsMerge = 5 jobGroupList = self.createJobGroups(nSubs=nSubsMerge, nJobs=nJobsMerge, site=[x for x in sites], task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, taskType='Merge') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() result = getJobsAction.execute(state='Executing', jobType='Merge') self.assertEqual(len(result), nSubsMerge * nJobsMerge) # Now set everything to Aborted, and create Merge jobs. Those should fail # since the can only run at one place for site in sites: myResourceControl.changeSiteState(site, 'Aborted') nSubsMerge = 1 nJobsMerge = 5 jobGroupList = self.createJobGroups(nSubs=nSubsMerge, nJobs=nJobsMerge, site=[x for x in sites], task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, taskType='Merge') for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') jobSubmitter.algorithm() result = getJobsAction.execute(state='SubmitFailed', jobType='Merge') self.assertEqual(len(result), nSubsMerge * nJobsMerge) result = getJobsAction.execute(state='Executing', jobType='Processing') self.assertEqual(len(result), nSubs * nJobs) return @attr('integration') def testF_PollerProfileTest(self): """ _testF_PollerProfileTest_ Submit a lot of jobs and test how long it takes for them to actually be submitted """ workload = self.createTestWorkload() config = self.getConfig() changeState = ChangeState(config) nSubs = 100 nJobs = 100 site = "T1_US_FNAL" self.setResourceThresholds(site, pendingSlots=20000, runningSlots=-1, tasks=['Processing', 'Merge'], Processing={ 'pendingSlots': 10000, 'runningSlots': -1 }, Merge={ 'pendingSlots': 10000, 'runningSlots': -1, 'priority': 5 }) # Always initialize the submitter after setting the sites, flaky! JobSubmitterPoller(config=config) jobGroupList = self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site) jobGroupList.extend( self.createJobGroups(nSubs=nSubs, nJobs=nJobs, task=workload.getTask("ReReco"), workloadSpec=self.workloadSpecPath, site=site, taskType='Merge')) for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') # Actually run it startTime = time.time() cProfile.runctx("JobSubmitterPoller(config=config).algorithm()", globals(), locals(), filename="testStats.stat") stopTime = time.time() print("Job took %f seconds to complete" % (stopTime - startTime)) p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return
class TaskArchiverTest(EmulatedUnitTestCase): """ TestCase for TestTaskArchiver module """ _setup_done = False _teardown = False _maxMessage = 10 OWNERDN = os.environ[ 'OWNERDN'] if 'OWNERDN' in os.environ else "Generic/OWNERDN" def setUp(self): """ setup for test. """ super(TaskArchiverTest, self).setUp() myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setSchema( customModules=["WMCore.WMBS", "WMComponent.DBS3Buffer"], useDefault=False) self.databaseName = "taskarchiver_t_0" self.testInit.setupCouch("%s/workloadsummary" % self.databaseName, "WorkloadSummary") self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump") self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump") self.testInit.setupCouch("wmagent_summary_t", "WMStats") self.testInit.setupCouch("wmagent_summary_central_t", "WMStats") self.testInit.setupCouch("stat_summary_t", "SummaryStats") reqmgrdb = "reqmgrdb_t" self.testInit.setupCouch(reqmgrdb, "ReqMgr") reqDBURL = "%s/%s" % (self.testInit.couchUrl, reqmgrdb) self.requestWriter = RequestDBWriter(reqDBURL) self.requestWriter.defaultStale = {} self.daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daofactory(classname="Jobs.GetAllJobs") self.inject = self.daofactory( classname="Workflow.MarkInjectedWorkflows") self.testDir = self.testInit.generateWorkDir() os.makedirs(os.path.join(self.testDir, 'specDir')) self.nJobs = 10 self.campaignName = 'aCampaign' return def tearDown(self): """ Database deletion """ self.testInit.clearDatabase(modules=["WMCore.WMBS"]) self.testInit.delWorkDir() self.testInit.tearDownCouch() return def getConfig(self): """ _createConfig_ General config file """ config = self.testInit.getConfiguration() # self.testInit.generateWorkDir(config) config.section_("General") config.General.workDir = "." config.General.ReqMgr2ServiceURL = "https://cmsweb-dev.cern.ch/reqmgr2" config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL", "cmssrv52.fnal.gov:5984") config.JobStateMachine.couchDBName = self.databaseName config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t' config.JobStateMachine.summaryStatsDBName = 'stat_summary_t' config.component_("JobCreator") config.JobCreator.jobCacheDir = os.path.join(self.testDir, 'testDir') config.component_("TaskArchiver") config.TaskArchiver.componentDir = self.testDir config.TaskArchiver.WorkQueueParams = { 'CacheDir': config.JobCreator.jobCacheDir } config.TaskArchiver.pollInterval = 60 config.TaskArchiver.logLevel = 'INFO' config.TaskArchiver.timeOut = 0 config.TaskArchiver.histogramKeys = [ 'AvgEventTime', 'writeTotalMB', 'jobTime' ] config.TaskArchiver.histogramBins = 5 config.TaskArchiver.histogramLimit = 5 config.TaskArchiver.perfPrimaryDatasets = [ 'SingleMu', 'MuHad', 'MinimumBias' ] config.TaskArchiver.perfDashBoardMinLumi = 50 config.TaskArchiver.perfDashBoardMaxLumi = 9000 config.TaskArchiver.dqmUrl = 'https://cmsweb.cern.ch/dqm/dev/' config.TaskArchiver.dashBoardUrl = 'http://dashboard43.cern.ch/dashboard/request.py/putluminositydata' config.TaskArchiver.workloadSummaryCouchDBName = "%s/workloadsummary" % self.databaseName config.TaskArchiver.localWMStatsURL = "%s/%s" % ( config.JobStateMachine.couchurl, config.JobStateMachine.jobSummaryDBName) config.TaskArchiver.workloadSummaryCouchURL = config.JobStateMachine.couchurl config.TaskArchiver.requireCouch = True config.component_("AnalyticsDataCollector") config.AnalyticsDataCollector.centralRequestDBURL = '%s/reqmgrdb_t' % config.JobStateMachine.couchurl config.AnalyticsDataCollector.RequestCouchApp = "ReqMgr" config.section_("ACDC") config.ACDC.couchurl = config.JobStateMachine.couchurl config.ACDC.database = config.JobStateMachine.couchDBName # Make the jobCacheDir os.mkdir(config.JobCreator.jobCacheDir) # addition for Alerts messaging framework, work (alerts) and control # channel addresses to which the component will be sending alerts # these are destination addresses where AlertProcessor:Receiver listens config.section_("Alert") config.Alert.address = "tcp://127.0.0.1:5557" config.Alert.controlAddr = "tcp://127.0.0.1:5559" config.section_("Agent") config.Agent.serverDN = "/we/bypass/myproxy/logon" return config def createWorkload(self, workloadName): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload(workloadName) taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.setCampaign(self.campaignName) workload.save(workloadName) return workload def createTestJobGroup(self, config, name="TestWorkthrough", filesetName="TestFileset", specLocation="spec.xml", error=False, task="/TestWorkload/ReReco", jobType="Processing"): """ Creates a group of several jobs """ testWorkflow = Workflow(spec=specLocation, owner=self.OWNERDN, name=name, task=task, owner_vogroup="", owner_vorole="") testWorkflow.create() self.inject.execute(names=[name], injected=True) testWMBSFileset = Fileset(name=filesetName) testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12314])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) outputWMBSFileset = Fileset(name='%sOutput' % filesetName) outputWMBSFileset.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10) testFileC.addRun(Run(10, *[12312])) testFileC.setLocation('malpaquet') testFileC.create() outputWMBSFileset.addFile(testFileC) outputWMBSFileset.commit() outputWMBSFileset.markOpen(0) testWorkflow.addOutput('output', outputWMBSFileset) testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=jobType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() changer = ChangeState(config) report1 = Report() report2 = Report() if error: path1 = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') else: path1 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'mergeReport1.pkl') path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') report1.load(filename=path1) report2.load(filename=path2) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') for i in range(self.nJobs): if i < self.nJobs / 2: testJobGroup.jobs[i]['fwjr'] = report1 else: testJobGroup.jobs[i]['fwjr'] = report2 changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed') changer.propagate(testJobGroup.jobs, 'exhausted', 'retrydone') changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup def createGiantJobSet(self, name, config, nSubs=10, nJobs=10, nFiles=1, spec="spec.xml"): """ Creates a massive set of jobs """ jobList = [] for i in range(0, nSubs): # Make a bunch of subscriptions localName = '%s-%i' % (name, i) testWorkflow = Workflow(spec=spec, owner=self.OWNERDN, name=localName, task="Test", owner_vogroup="", owner_vorole="") testWorkflow.create() testWMBSFileset = Fileset(name=localName) testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() filesToComplete = [] for j in range(0, nJobs): # Create jobs for each subscription testFileA = File(lfn="%s-%i-lfnA" % (localName, j), size=1024, events=10) testFileA.addRun( Run( 10, *[ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 ])) testFileA.setLocation('malpaquet') testFileA.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.commit() filesToComplete.append(testFileA) testJob = Job(name='%s-%i' % (localName, j)) testJob.addFile(testFileA) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) jobList.append(testJob) for k in range(0, nFiles): # Create output files testFile = File(lfn="%s-%i-output" % (localName, k), size=1024, events=10) testFile.addRun(Run(10, *[12312])) testFile.setLocation('malpaquet') testFile.create() testJobGroup.output.addFile(testFile) testJobGroup.output.commit() testJobGroup.commit() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') changer.propagate(testJobGroup.jobs, 'cleanout', 'success') testWMBSFileset.markOpen(0) testSubscription.completeFiles(filesToComplete) return jobList def getPerformanceFromDQM(self, dqmUrl, dataset, run): # Make function to fetch this from DQM. Returning Null or False if it fails getUrl = "%sjsonfairy/archive/%s%s/DQM/TimerService/event_byluminosity" % ( dqmUrl, run, dataset) # Assert if the URL is assembled as expected if run == 207214: self.assertEqual( 'https://cmsweb.cern.ch/dqm/dev/jsonfairy/archive/207214/MinimumBias/Commissioning10-v4/DQM/DQM/TimerService/event_byluminosity', getUrl) # let's suppose it works.. testResponseFile = open( os.path.join(getTestBase(), 'WMComponent_t/TaskArchiver_t/DQMGUIResponse.json'), 'r') response = testResponseFile.read() testResponseFile.close() responseJSON = json.loads(response) return responseJSON def filterInterestingPerfPoints(self, responseJSON, minLumi, maxLumi): worthPoints = {} points = responseJSON["hist"]["bins"]["content"] for i in range(responseJSON["hist"]["xaxis"]["first"]["id"], responseJSON["hist"]["xaxis"]["last"]["id"]): # is the point worth it? if yes add to interesting points dictionary. # 1 - non 0 # 2 - between minimum and maximum expected luminosity # FIXME : 3 - population in dashboard for the bin interval < 100 # Those should come from the config : if points[i] == 0: continue binSize = responseJSON["hist"]["xaxis"]["last"][ "value"] / responseJSON["hist"]["xaxis"]["last"]["id"] # Fetching the important values instLuminosity = i * binSize timePerEvent = points[i] if instLuminosity > minLumi and instLuminosity < maxLumi: worthPoints[instLuminosity] = timePerEvent return worthPoints def publishPerformanceDashBoard(self, dashBoardUrl, PD, release, worthPoints): dashboardPayload = [] for instLuminosity in worthPoints: timePerEvent = int(worthPoints[instLuminosity]) dashboardPayload.append({ "primaryDataset": PD, "release": release, "integratedLuminosity": instLuminosity, "timePerEvent": timePerEvent }) data = "{\"data\":%s}" % str(dashboardPayload).replace("\'", "\"") # let's suppose it works.. testDashBoardPayloadFile = open( os.path.join(getTestBase(), 'WMComponent_t/TaskArchiver_t/DashBoardPayload.json'), 'r') testDashBoardPayload = testDashBoardPayloadFile.read() testDashBoardPayloadFile.close() self.assertEqual(data, testDashBoardPayload) return True def populateWorkflowWithCompleteStatus(self, name="TestWorkload"): schema = generate_reqmgr_schema(1) schema[0]["RequestName"] = name self.requestWriter.insertGenericRequest(schema[0]) result = self.requestWriter.updateRequestStatus(name, "completed") return result def testA_BasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(config=config, name=workload.name(), specLocation=workloadPath, error=False) # Create second workload testJobGroup2 = self.createTestJobGroup( config=config, name=workload.name(), filesetName="TestFileset_2", specLocation=workloadPath, task="/TestWorkload/ReReco/LogCollect", jobType="LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) cachePath2 = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "LogCollect") os.makedirs(cachePath2) self.assertTrue(os.path.exists(cachePath2)) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 2) workflowName = "TestWorkload" dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobs = jobdb.loadView("JobDump", "jobsByWorkflowName", options={ "startkey": [workflowName], "endkey": [workflowName, {}] })['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options={ "startkey": [workflowName], "endkey": [workflowName, {}] })['rows'] self.assertEqual(len(jobs), 2 * self.nJobs) from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase create = CreateWMBSBase() tables = [] for x in create.requiredTables: tables.append(x[2:]) self.populateWorkflowWithCompleteStatus() testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() cleanCouch = CleanCouchPoller(config=config) cleanCouch.setup() cleanCouch.algorithm() result = myThread.dbi.processData( "SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_fileset")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) # Make sure we deleted the directory self.assertFalse(os.path.exists(cachePath)) self.assertFalse( os.path.exists( os.path.join(self.testDir, 'workloadTest/TestWorkload'))) testWMBSFileset = Fileset(id=1) self.assertEqual(testWMBSFileset.exists(), False) workloadSummary = workdatabase.document(id="TestWorkload") # Check ACDC self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url']) # Check the output self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO']) self.assertEqual( sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO'] ['tasks']), ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect']) # Check performance # Check histograms self.assertAlmostEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['histogram'][0]['average'], 0.89405199999999996, places=2) self.assertEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['histogram'][0]['nEvents'], 10) # Check standard performance self.assertAlmostEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['TotalJobCPU']['average'], 17.786300000000001, places=2) self.assertAlmostEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['TotalJobCPU']['stdDev'], 0.0, places=2) # Check worstOffenders self.assertEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['worstOffenders'], [{ 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1 }, { 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1 }, { 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2 }]) # Check retryData self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'1': 10}) logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar' self.assertEqual(workloadSummary['logArchives'], { '/TestWorkload/ReReco/LogCollect': [logCollectPFN for _ in range(10)] }) # LogCollect task is made out of identical FWJRs # assert that it is identical for x in workloadSummary['performance'][ '/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys(): if x in config.TaskArchiver.histogramKeys: continue for y in ['average', 'stdDev']: self.assertAlmostEqual( workloadSummary['performance'] ['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y], workloadSummary['performance']['/TestWorkload/ReReco'] ['cmsRun1'][x][y], places=2) return def testB_testErrors(self): """ _testErrors_ Test with a failed FWJR """ config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(config=config, name=workload.name(), specLocation=workloadPath, error=True) # Create second workload testJobGroup2 = self.createTestJobGroup( config=config, name=workload.name(), filesetName="TestFileset_2", specLocation=workloadPath, task="/TestWorkload/ReReco/LogCollect", jobType="LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) couchdb = CouchServer(config.JobStateMachine.couchurl) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobdb.loadView("JobDump", "jobsByWorkflowName", options={ "startkey": [workload.name()], "endkey": [workload.name(), {}] })['rows'] fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName", options={ "startkey": [workload.name()], "endkey": [workload.name(), {}] })['rows'] self.populateWorkflowWithCompleteStatus() testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() cleanCouch = CleanCouchPoller(config=config) cleanCouch.setup() cleanCouch.algorithm() dbname = getattr(config.JobStateMachine, "couchDBName") workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname) workloadSummary = workdatabase.document(id=workload.name()) self.assertEqual( workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500) self.assertTrue('99999' in workloadSummary['errors'] ['/TestWorkload/ReReco']['cmsRun1']) failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco'][ 'cmsRun1']['99999']['runs'] self.assertEqual( failedRunInfo, {'10': [[12312, 12312]]}, "Wrong lumi information in the summary for failed jobs") # Check the failures by site histograms self.assertEqual( workloadSummary['histograms']['workflowLevel']['failuresBySite'] ['data']['T1_IT_CNAF']['Failed Jobs'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10) self.assertEqual( workloadSummary['histograms']['workflowLevel']['failuresBySite'] ['average']['Failed Jobs'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['average']['99999'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['average']['8020'], 10) self.assertEqual( workloadSummary['histograms']['workflowLevel']['failuresBySite'] ['stdDev']['Failed Jobs'], 0) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['stdDev']['99999'], 0) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['stdDev']['8020'], 0) return @attr("integration") def testC_Profile(self): """ _Profile_ DON'T RUN THIS! """ import cProfile import pstats name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10) cleanCouch = CleanCouchPoller(config=config) cleanCouch.setup() cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename="testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return @attr("integration") def testD_Timing(self): """ _Timing_ This is to see how fast things go. """ myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10) testTaskArchiver = TaskArchiverPoller(config=config) startTime = time.time() testTaskArchiver.algorithm() stopTime = time.time() result = myThread.dbi.processData( "SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) testWMBSFileset = Fileset(id=1) self.assertEqual(testWMBSFileset.exists(), False) logging.info("TaskArchiver took %f seconds", (stopTime - startTime)) def testDQMRecoPerformanceToDashBoard(self): myThread = threading.currentThread() listRunsWorkflow = self.dbsDaoFactory(classname="ListRunsWorkflow") # Didn't like to have done that, but the test doesn't provide all info I need in the system, so faking it: myThread.dbi.processData( """insert into dbsbuffer_workflow(id, name) values (1, 'TestWorkload')""", transaction=False) myThread.dbi.processData( """insert into dbsbuffer_file (id, lfn, dataset_algo, workflow) values (1, '/store/t/e/s/t.test', 1, 1)""", transaction=False) myThread.dbi.processData( """insert into dbsbuffer_file (id, lfn, dataset_algo, workflow) values (2, '/store/t/e/s/t.test2', 1, 1)""", transaction=False) myThread.dbi.processData( """insert into dbsbuffer_file_runlumi_map (run, lumi, filename) values (207214, 100, 1)""", transaction=False) myThread.dbi.processData( """insert into dbsbuffer_file_runlumi_map (run, lumi, filename) values (207215, 200, 2)""", transaction=False) config = self.getConfig() dqmUrl = getattr(config.TaskArchiver, "dqmUrl") perfDashBoardMinLumi = getattr(config.TaskArchiver, "perfDashBoardMinLumi") perfDashBoardMaxLumi = getattr(config.TaskArchiver, "perfDashBoardMaxLumi") dashBoardUrl = getattr(config.TaskArchiver, "dashBoardUrl") workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(config=config, name=workload.name(), specLocation=workloadPath, error=True) testJobGroup2 = self.createTestJobGroup( config=config, name=workload.name(), filesetName="TestFileset_2", specLocation=workloadPath, task="/TestWorkload/ReReco/LogCollect", jobType="LogCollect") # Adding request type as ReReco, real ReqMgr requests have it workload.data.request.section_("schema") workload.data.request.schema.RequestType = "ReReco" workload.data.request.schema.CMSSWVersion = 'test_compops_CMSSW_5_3_6_patch1' workload.getTask('ReReco').addInputDataset(name='/a/b/c', primary='a', processed='b', tier='c') interestingPDs = getattr(config.TaskArchiver, "perfPrimaryDatasets") interestingDatasets = [] # Are the datasets from this request interesting? Do they have DQM output? One might ask afterwards if they have harvest for dataset in workload.listOutputDatasets(): (nothing, PD, procDataSet, dataTier) = dataset.split('/') if PD in interestingPDs and dataTier == "DQM": interestingDatasets.append(dataset) # We should have found 1 interesting dataset self.assertAlmostEqual(len(interestingDatasets), 1) if len(interestingDatasets) == 0: return # Request will be only interesting for performance if it's a ReReco or PromptReco (isReReco, isPromptReco) = (False, False) if getattr(workload.data.request.schema, "RequestType", None) == 'ReReco': isReReco = True # Yes, few people like magic strings, but have a look at : # https://github.com/dmwm/T0/blob/master/src/python/T0/RunConfig/RunConfigAPI.py#L718 # Might be safe enough # FIXME: in TaskArchiver, add a test to make sure that the dataset makes sense (procDataset ~= /a/ERA-PromptReco-vVERSON/DQM) if re.search('PromptReco', workload.name()): isPromptReco = True if not (isReReco or isPromptReco): return self.assertTrue(isReReco) self.assertFalse(isPromptReco) # We are not interested if it's not a PromptReco or a ReReco if not (isReReco or isPromptReco): return if isReReco: release = getattr(workload.data.request.schema, "CMSSWVersion") if not release: logging.info("no release for %s, bailing out", workload.name()) else: release = getattr( workload.tasks.Reco.steps.cmsRun1.application.setup, "cmsswVersion") if not release: logging.info("no release for %s, bailing out", workload.name()) self.assertEqual(release, "test_compops_CMSSW_5_3_6_patch1") # If all is true, get the run numbers processed by this worklfow runList = listRunsWorkflow.execute(workflow=workload.name()) self.assertEqual([207214, 207215], runList) # GO to DQM GUI, get what you want # https://cmsweb.cern.ch/dqm/offline/jsonfairy/archive/211313/PAMuon/HIRun2013-PromptReco-v1/DQM/DQM/TimerService/event for dataset in interestingDatasets: (nothing, PD, procDataSet, dataTier) = dataset.split('/') worthPoints = {} for run in runList: responseJSON = self.getPerformanceFromDQM(dqmUrl, dataset, run) worthPoints.update( self.filterInterestingPerfPoints(responseJSON, perfDashBoardMinLumi, perfDashBoardMaxLumi)) # Publish dataset performance to DashBoard. if not self.publishPerformanceDashBoard(dashBoardUrl, PD, release, worthPoints): logging.info( "something went wrong when publishing dataset %s to DashBoard", dataset) return
class StepChainTests(unittest.TestCase): """ _StepChainTests_ Tests the StepChain spec file """ def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("stepchain_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("stepchain_t") self.testInit.generateWorkDir() self.workload = None self.jsonTemplate = getTestFile( 'data/ReqMgr/requests/DMWM/StepChain_MC.json') return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def testStepChainSingleStep(self): """ Build a StepChain single step, reading AODSIM and producing MINIAODSIM """ testArguments = StepChainWorkloadFactory.getTestArguments() request = { "Campaign": "TaskForceUnitTest", "CMSSWVersion": "CMSSW_7_5_0", "ScramArch": "slc6_amd64_gcc491", "DbsUrl": "https://cmsweb.cern.ch/dbs/prod/global/DBSReader", "GlobalTag": "PHYS14_25_V3", "AcquisitionEra": "SingleStep", "ProcessingString": "UnitTest_StepChain", "ProcessingVersion": 3, "PrepID": "MainStep", "CouchURL": os.environ["COUCHURL"], "CouchDBName": "stepchain_t", "Memory": 3500, "SizePerEvent": 2600, "TimePerEvent": 26.5, "Step1": { "ConfigCacheID": injectStepChainConfigSingle(self.configDatabase), "GlobalTag": "PHYS14_25_V44", "InputDataset": "/RSGravToGG_kMpl-01_M-5000_TuneCUEP8M1_13TeV-pythia8/RunIISpring15DR74-Asympt50ns_MCRUN2_74_V9A-v1/AODSIM", "SplittingAlgo": "EventAwareLumiBased", "EventsPerJob": 500, "StepName": "StepMini" }, "StepChain": 1 } testArguments.update(request) factory = StepChainWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepMini", "GravWhatever", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) # test workload properties self.assertEqual(testWorkload.getDashboardActivity(), "processing") self.assertEqual(testWorkload.getCampaign(), "TaskForceUnitTest") self.assertEqual(testWorkload.getAcquisitionEra(), "SingleStep") self.assertEqual(testWorkload.getProcessingString(), "UnitTest_StepChain") self.assertEqual(testWorkload.getProcessingVersion(), 3) self.assertEqual(testWorkload.getPrepID(), "MainStep") self.assertEqual(sorted(testWorkload.getCMSSWVersions()), ['CMSSW_7_5_0']) self.assertEqual(testWorkload.data.policies.start.policyName, "Block") # test workload tasks and steps tasks = testWorkload.listAllTaskNames() self.assertEqual(len(tasks), 4) self.assertTrue('StepMiniMergeMINIAODSIMoutput' in tasks) task = testWorkload.getTask(tasks[0]) self.assertEqual(task.taskType(), "Processing", "Wrong task type") splitParams = task.jobSplittingParameters() self.assertEqual(splitParams['algorithm'], "EventAwareLumiBased", "Wrong job splitting algo") self.assertEqual(splitParams['events_per_job'], 500) self.assertTrue(splitParams['performance']['timePerEvent'] > 26.4) self.assertTrue(splitParams['performance']['sizePerEvent'] > 2599) self.assertTrue( splitParams['performance']['memoryRequirement'] == 3500) # test workload step stuff self.assertEqual(sorted(task.listAllStepNames()), ['cmsRun1', 'logArch1', 'stageOut1']) self.assertEqual(task.getTopStepName(), 'cmsRun1') self.assertEqual(task.getStep("cmsRun1").stepType(), "CMSSW") self.assertFalse(task.getInputStep(), "Wrong input step") outModsAndDsets = task.listOutputDatasetsAndModules()[0] self.assertEqual(outModsAndDsets['outputModule'], 'MINIAODSIMoutput') self.assertEqual( outModsAndDsets['outputDataset'], '/RSGravToGG_kMpl-01_M-5000_TuneCUEP8M1_13TeV-pythia8/SingleStep-UnitTest_StepChain-v3/MINIAODSIM' ) self.assertEqual(task.getSwVersion(), 'CMSSW_7_5_0') self.assertEqual(task.getScramArch(), 'slc6_amd64_gcc491') step = task.getStep("cmsRun1") self.assertEqual( step.data.application.configuration.arguments.globalTag, 'PHYS14_25_V44') return def testStepChainMC(self): """ Build a StepChain workload starting from scratch """ # Read in the request request = json.load(open(self.jsonTemplate)) testArguments = request['createRequest'] testArguments.update({ "CouchURL": os.environ["COUCHURL"], "ConfigCacheUrl": os.environ["COUCHURL"], "CouchDBName": "stepchain_t" }) configDocs = injectStepChainConfigMC(self.configDatabase) for s in ['Step1', 'Step2', 'Step3']: testArguments[s]['ConfigCacheID'] = configDocs[s] factory = StepChainWorkloadFactory() # test that we cannot stage out different samples with the same output module self.assertRaises(WMSpecFactoryException, factory.factoryWorkloadConstruction, "TestWorkload", testArguments) testArguments['Step2']['KeepOutput'] = False testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "ProdMinBias", "MCFakeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) # test workload properties self.assertEqual(testWorkload.getDashboardActivity(), "production") self.assertEqual(testWorkload.getCampaign(), "Campaign-OVERRIDE-ME") self.assertEqual(testWorkload.getAcquisitionEra(), "CMSSW_7_0_0_pre11") self.assertEqual(testWorkload.getProcessingString(), "START70_V4") self.assertEqual(testWorkload.getProcessingVersion(), 1) self.assertEqual(testWorkload.getPrepID(), "Step-00") self.assertEqual(sorted(testWorkload.getCMSSWVersions()), ['CMSSW_7_0_0_pre11', 'CMSSW_7_0_0_pre12']) # test workload attributes self.assertEqual(testWorkload.processingString, "START70_V4") self.assertEqual(testWorkload.acquisitionEra, "CMSSW_7_0_0_pre11") self.assertEqual(testWorkload.processingVersion, 1) self.assertFalse(testWorkload.lumiList, "Wrong lumiList") self.assertEqual(testWorkload.data.policies.start.policyName, "MonteCarlo") # test workload tasks and steps tasks = testWorkload.listAllTaskNames() self.assertEqual(len(tasks), 10) for t in [ 'ProdMinBias', 'ProdMinBiasMergeRAWSIMoutput', 'RECOPROD1MergeAODSIMoutput', 'RECOPROD1MergeRECOSIMoutput' ]: self.assertTrue(t in tasks, "Wrong task name") self.assertFalse('ProdMinBiasMergeAODSIMoutput' in tasks, "Wrong task name") task = testWorkload.getTask(tasks[0]) self.assertEqual(task.name(), "ProdMinBias") self.assertEqual(task.getPathName(), "/TestWorkload/ProdMinBias") self.assertEqual(task.taskType(), "Production", "Wrong task type") splitParams = task.jobSplittingParameters() self.assertEqual(splitParams['algorithm'], "EventBased", "Wrong job splitting algo") self.assertEqual(splitParams['events_per_job'], 150) self.assertEqual(splitParams['events_per_lumi'], 50) self.assertFalse(splitParams['lheInputFiles'], "Wrong LHE flag") self.assertTrue(splitParams['performance']['timePerEvent'] > 4.75) self.assertTrue(splitParams['performance']['sizePerEvent'] > 1233) self.assertTrue( splitParams['performance']['memoryRequirement'] == 2400) self.assertFalse(task.getTrustSitelists().get('trustlists'), "Wrong input location flag") self.assertFalse(task.inputRunWhitelist(), "Wrong run white list") # test workload step stuff self.assertEqual( sorted(task.listAllStepNames()), ['cmsRun1', 'cmsRun2', 'cmsRun3', 'logArch1', 'stageOut1']) self.assertEqual(task.getTopStepName(), 'cmsRun1') self.assertEqual(task.getStep("cmsRun1").stepType(), "CMSSW") self.assertFalse(task.getInputStep(), "Wrong input step") outModsAndDsets = task.listOutputDatasetsAndModules() outMods = set([elem['outputModule'] for elem in outModsAndDsets]) outDsets = [elem['outputDataset'] for elem in outModsAndDsets] self.assertEqual( outMods, set(['RAWSIMoutput', 'AODSIMoutput', 'RECOSIMoutput']), "Wrong output modules") self.assertTrue( '/RelValProdMinBias/CMSSW_7_0_0_pre11-FilterA-START70_V4-v1/GEN-SIM' in outDsets) self.assertTrue( '/RelValProdMinBias/CMSSW_7_0_0_pre11-FilterD-START70_V4-v1/AODSIM' in outDsets) self.assertTrue( '/RelValProdMinBias/CMSSW_7_0_0_pre11-FilterC-START70_V4-v1/GEN-SIM-RECO' in outDsets) self.assertEqual(task.getSwVersion(), 'CMSSW_7_0_0_pre12') self.assertEqual(task.getScramArch(), 'slc5_amd64_gcc481') step = task.getStep("cmsRun1") self.assertFalse(step.data.tree.parent) self.assertFalse(getattr(step.data.input, 'inputStepName', None)) self.assertFalse(getattr(step.data.input, 'inputOutputModule', None)) self.assertEqual(step.data.output.modules.RAWSIMoutput.filterName, 'FilterA') self.assertEqual(step.data.output.modules.RAWSIMoutput.dataTier, 'GEN-SIM') self.assertTrue(step.data.output.keep) self.assertEqual(sorted(step.data.tree.childNames), ['cmsRun2', 'logArch1', 'stageOut1']) self.assertEqual(step.data.application.setup.cmsswVersion, 'CMSSW_7_0_0_pre12') self.assertEqual(step.data.application.setup.scramArch, 'slc5_amd64_gcc481') self.assertEqual( step.data.application.configuration.arguments.globalTag, 'START70_V4::All') step = task.getStep("cmsRun2") self.assertEqual(step.data.tree.parent, "cmsRun1") self.assertEqual(step.data.input.inputStepName, 'cmsRun1') self.assertEqual(step.data.input.inputOutputModule, 'RAWSIMoutput') self.assertEqual(step.data.output.modules.RAWSIMoutput.filterName, 'FilterB') self.assertEqual(step.data.output.modules.RAWSIMoutput.dataTier, 'GEN-SIM-RAW') self.assertFalse(step.data.output.keep) self.assertEqual(step.data.tree.childNames, ["cmsRun3"]) self.assertEqual(step.data.application.setup.cmsswVersion, 'CMSSW_7_0_0_pre11') self.assertEqual(step.data.application.setup.scramArch, 'slc5_amd64_gcc481') self.assertEqual( step.data.application.configuration.arguments.globalTag, 'START70_V4::All') step = task.getStep("cmsRun3") self.assertEqual(step.data.tree.parent, "cmsRun2") self.assertEqual(step.data.input.inputStepName, 'cmsRun2') self.assertEqual(step.data.input.inputOutputModule, 'RAWSIMoutput') self.assertEqual(step.data.output.modules.RECOSIMoutput.filterName, 'FilterC') self.assertEqual(step.data.output.modules.AODSIMoutput.filterName, 'FilterD') self.assertEqual(step.data.output.modules.RECOSIMoutput.dataTier, 'GEN-SIM-RECO') self.assertEqual(step.data.output.modules.AODSIMoutput.dataTier, 'AODSIM') self.assertTrue(step.data.output.keep) self.assertFalse(step.data.tree.childNames) self.assertEqual(step.data.application.setup.cmsswVersion, 'CMSSW_7_0_0_pre11') self.assertEqual(step.data.application.setup.scramArch, 'slc5_amd64_gcc481') self.assertEqual( step.data.application.configuration.arguments.globalTag, 'START70_V4::All') return def testStepMapping(self): """ Build a mapping of steps, input and output modules """ factory = StepChainWorkloadFactory() request = json.load(open(self.jsonTemplate)) testArguments = request['createRequest'] # Create a new DIGI step in Step3 and shift Step3 to Step4 testArguments['Step4'] = copy(testArguments['Step3']) testArguments['Step3'] = { "GlobalTag": "START70_V4::All", "InputFromOutputModule": "RAWSIMoutput", "InputStep": "ProdMinBias", "StepName": "DIGIPROD2" } testArguments['StepChain'] = 4 testArguments.update({ "CouchURL": os.environ["COUCHURL"], "ConfigCacheUrl": os.environ["COUCHURL"], "CouchDBName": "stepchain_t" }) configDocs = injectStepChainConfigMC(self.configDatabase) for s in ['Step1', 'Step2', 'Step3', 'Step4']: testArguments[s]['ConfigCacheID'] = configDocs[s] testArguments[s]['KeepOutput'] = False # docs are in the wrong order for this case testArguments['Step3']['ConfigCacheID'] = configDocs['Step4'] testArguments['Step4']['ConfigCacheID'] = configDocs['Step3'] expectedTasks = set([ 'ProdMinBias', 'RECOPROD1MergeAODSIMoutput', 'RECOPROD1MergeRECOSIMoutput', 'RECOPROD1AODSIMoutputMergeLogCollect', 'RECOPROD1RECOSIMoutputMergeLogCollect', 'RECOPROD1CleanupUnmergedAODSIMoutput', 'RECOPROD1CleanupUnmergedRECOSIMoutput' ]) expectedSteps = set([ 'cmsRun1', 'cmsRun2', 'cmsRun3', 'cmsRun4', 'stageOut1', 'logArch1' ]) self.assertRaises(WMSpecFactoryException, factory.factoryWorkloadConstruction, "TestWorkload", testArguments) testArguments['Step4']['KeepOutput'] = True testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) self.assertEqual(len(testWorkload.listAllTaskNames()), len(expectedTasks)) self.assertEqual(set(testWorkload.listAllTaskNames()), expectedTasks) task = testWorkload.getTask('ProdMinBias') self.assertEqual(set(task.listAllStepNames()), expectedSteps) step1 = task.getStep('cmsRun1') stepInputSection = step1.data.input.dictionary_() self.assertFalse('inputStepName' in stepInputSection) self.assertEqual(set(step1.data.output.modules.dictionary_().keys()), set(['RAWSIMoutput'])) self.assertEqual( step1.data.output.modules.RAWSIMoutput.dictionary_()['dataTier'], 'GEN-SIM') step2 = task.getStep('cmsRun2') stepInputSection = step2.data.input.dictionary_() self.assertTrue(set(stepInputSection['inputStepName']), 'cmsRun1') self.assertTrue(set(stepInputSection['inputOutputModule']), 'RAWSIMoutput') self.assertEqual(set(step2.data.output.modules.dictionary_().keys()), set(['RAWSIMoutput'])) self.assertEqual( step2.data.output.modules.RAWSIMoutput.dictionary_()['dataTier'], 'GEN-SIM-RAW') step3 = task.getStep('cmsRun3') stepInputSection = step3.data.input.dictionary_() self.assertTrue(set(stepInputSection['inputStepName']), 'cmsRun1') self.assertTrue(set(stepInputSection['inputOutputModule']), 'RAWSIMoutput') self.assertEqual(set(step3.data.output.modules.dictionary_().keys()), set(['RAWSIMoutput'])) self.assertEqual( step3.data.output.modules.RAWSIMoutput.dictionary_()['dataTier'], 'GEN-SIM-RAW') step4 = task.getStep('cmsRun4') stepInputSection = step4.data.input.dictionary_() self.assertTrue(set(stepInputSection['inputStepName']), 'cmsRun2') self.assertTrue(set(stepInputSection['inputOutputModule']), 'RAWSIMoutput') self.assertEqual(set(step4.data.output.modules.dictionary_().keys()), set(['AODSIMoutput', 'RECOSIMoutput'])) self.assertEqual( step4.data.output.modules.AODSIMoutput.dictionary_()['dataTier'], 'AODSIM')
class PromptRecoTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("promptreco_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("promptreco_t") self.testDir = self.testInit.generateWorkDir() myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.listTasksByWorkflow = self.daoFactory(classname="Workflow.LoadFromName") self.listFilesets = self.daoFactory(classname="Fileset.List") self.listSubsMapping = self.daoFactory(classname="Subscriptions.ListSubsAndFilesetsFromWorkflow") self.promptSkim = None if PY3: self.assertItemsEqual = self.assertCountEqual return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def setupPromptSkimConfigObject(self): """ _setupPromptSkimConfigObject_ Creates a custom config object for testing of the skim functionality """ self.promptSkim = ConfigSection(name="Tier1Skim") self.promptSkim.SkimName = "TestSkim1" self.promptSkim.DataTier = "RECO" self.promptSkim.TwoFileRead = False self.promptSkim.ProcessingVersion = "PromptSkim-v1" self.promptSkim.ConfigURL = "http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi/CMSSW/Configuration/DataOps/python/prescaleskimmer.py?revision=1.1" # def testPromptReco(self): # """ # _testPromptReco_ # # Create a Prompt Reconstruction workflow # and verify it installs into WMBS correctly. # """ def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = {"write_RECO": "RECO", "write_ALCARECO": "ALCARECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = recoWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: fset = goldenOutputMod + "ALCARECO" mergedOutput = alcaSkimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap)) mergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name="/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for unmergedOutput, tier in viewitems(unmergedOutputs): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset( name="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) # test default values taskPaths = ['/TestWorkload/Reco', '/TestWorkload/Reco/AlcaSkim'] for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided testArguments["Multicore"] = 6 testArguments["Memory"] = 4600.0 testArguments["EventStreams"] = 3 testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if task == '/TestWorkload/Reco' and step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), testArguments["Multicore"]) self.assertEqual(stepHelper.getNumberOfStreams(), testArguments["EventStreams"]) elif step in ('stageOut1', 'logArch1'): self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) else: self.assertEqual(stepHelper.getNumberOfCores(), 1, "%s should be single-core" % task) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], testArguments["Memory"]) return def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = ['/TestWorkload/Reco', '/TestWorkload/Reco/AlcaSkim', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '/TestWorkload/Reco/RecoMergewrite_AOD', '/TestWorkload/Reco/RecoMergewrite_DQM', '/TestWorkload/Reco/RecoMergewrite_RECO'] expWfTasks = ['/TestWorkload/Reco', '/TestWorkload/Reco/AlcaSkim', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamHcalCalHOCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamMuAlGlobalCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamTkAlCosmics0T', '/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/AlcaSkimALCARECOStreamHcalCalHOCosmicsMergeLogCollect', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/AlcaSkimALCARECOStreamMuAlGlobalCosmicsMergeLogCollect', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/AlcaSkimALCARECOStreamTkAlCosmics0TMergeLogCollect', '/TestWorkload/Reco/LogCollect', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_ALCARECO', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_AOD', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_DQM', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_RECO', '/TestWorkload/Reco/RecoMergewrite_AOD', '/TestWorkload/Reco/RecoMergewrite_AOD/Recowrite_AODMergeLogCollect', '/TestWorkload/Reco/RecoMergewrite_DQM', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect', '/TestWorkload/Reco/RecoMergewrite_DQM/Recowrite_DQMMergeLogCollect', '/TestWorkload/Reco/RecoMergewrite_RECO', '/TestWorkload/Reco/RecoMergewrite_RECO/Recowrite_RECOMergeLogCollect'] expFsets = ['TestWorkload-Reco-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/merged-MergedALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamHcalCalHOCosmicsALCARECO', '/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/merged-MergedALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/merged-MergedALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamMuAlGlobalCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamTkAlCosmics0TALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-logArchive', '/TestWorkload/Reco/RecoMergewrite_AOD/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_AOD/merged-MergedAOD', '/TestWorkload/Reco/unmerged-write_AODAOD', '/TestWorkload/Reco/unmerged-write_DQMDQM', '/TestWorkload/Reco/RecoMergewrite_DQM/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/Reco/RecoMergewrite_RECO/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_RECO/merged-MergedRECO', '/TestWorkload/Reco/unmerged-logArchive', '/TestWorkload/Reco/unmerged-write_RECORECO'] subMaps = [(5, '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/AlcaSkimALCARECOStreamHcalCalHOCosmicsMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/AlcaSkimALCARECOStreamMuAlGlobalCosmicsMergeLogCollect', 'MinFileBased', 'LogCollect'), (11, '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/AlcaSkimALCARECOStreamTkAlCosmics0TMergeLogCollect', 'MinFileBased', 'LogCollect'), (3, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamHcalCalHOCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamHcalCalHOCosmics', 'SiblingProcessingBased', 'Cleanup'), (4, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamHcalCalHOCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', 'ParentlessMergeBySize', 'Merge'), (6, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamMuAlGlobalCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamMuAlGlobalCosmics', 'SiblingProcessingBased', 'Cleanup'), (7, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamMuAlGlobalCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', 'ParentlessMergeBySize', 'Merge'), (9, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamTkAlCosmics0TALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamTkAlCosmics0T', 'SiblingProcessingBased', 'Cleanup'), (10, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamTkAlCosmics0TALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', 'ParentlessMergeBySize', 'Merge'), (12, '/TestWorkload/Reco/AlcaSkim/unmerged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect', 'MinFileBased', 'LogCollect'), (16, '/TestWorkload/Reco/RecoMergewrite_AOD/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_AOD/Recowrite_AODMergeLogCollect', 'MinFileBased', 'LogCollect'), (21, '/TestWorkload/Reco/RecoMergewrite_DQM/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_DQM/Recowrite_DQMMergeLogCollect', 'MinFileBased', 'LogCollect'), (19, '/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged', 'Harvest', 'Harvesting'), (20, '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect', 'MinFileBased', 'LogCollect'), (24, '/TestWorkload/Reco/RecoMergewrite_RECO/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_RECO/Recowrite_RECOMergeLogCollect', 'MinFileBased', 'LogCollect'), (25, '/TestWorkload/Reco/unmerged-logArchive', '/TestWorkload/Reco/LogCollect', 'MinFileBased', 'LogCollect'), (2, '/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO', '/TestWorkload/Reco/AlcaSkim', 'ParentlessMergeBySize', 'Processing'), (13, '/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_ALCARECO', 'SiblingProcessingBased', 'Cleanup'), (14, '/TestWorkload/Reco/unmerged-write_AODAOD', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_AOD', 'SiblingProcessingBased', 'Cleanup'), (15, '/TestWorkload/Reco/unmerged-write_AODAOD', '/TestWorkload/Reco/RecoMergewrite_AOD', 'ParentlessMergeBySize', 'Merge'), (17, '/TestWorkload/Reco/unmerged-write_DQMDQM', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_DQM', 'SiblingProcessingBased', 'Cleanup'), (18, '/TestWorkload/Reco/unmerged-write_DQMDQM', '/TestWorkload/Reco/RecoMergewrite_DQM', 'ParentlessMergeBySize', 'Merge'), (22, '/TestWorkload/Reco/unmerged-write_RECORECO', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_RECO', 'SiblingProcessingBased', 'Cleanup'), (23, '/TestWorkload/Reco/unmerged-write_RECORECO', '/TestWorkload/Reco/RecoMergewrite_RECO', 'ParentlessMergeBySize', 'Merge'), (1, 'TestWorkload-Reco-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/Reco', 'EventAwareLumiBased', 'Processing')] testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "Reco", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps) def testallowCreationFailureArgExists(self): """ Test allowCreationFailure arguments exists. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() factory.factoryWorkloadConstruction("TestWorkload", testArguments) factory.procJobSplitArgs["allowCreationFailure"] = False self.assertItemsEqual(factory.procJobSplitArgs, {'events_per_job': 500, 'allowCreationFailure': False, 'job_time_limit': 345600})
class CMSSWFetcherTest(unittest.TestCase): """ Main test for the URLFetcher """ def setUp(self): """ Basic setUp """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ Basic tearDown """ self.testInit.delWorkDir() self.testInit.tearDownCouch() return def getConfig(self): """ _getConfig_ Create a test config and put it in the cache """ PSetTweak = { 'someKey': "Hello, I am a PSetTweak. It's nice to meet you." } configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.attachments['configFile'] = 'This Is A Test Config' configCache.save() return configCache def createTask(self, configCache): """ _createTask_ Create a test task that includes the fileURL """ task = WMTask.makeWMTask("testTask") task.makeStep("step1") task.makeStep("step2") for t in task.steps().nodeIterator(): t = WMStep.WMStepHelper(t) os.mkdir(os.path.join(self.testDir, t.name())) t.setStepType("CMSSW") t.data.application.section_('command') t.data.application.configuration.configCacheUrl = configCache.dburl t.data.application.configuration.cacheName = configCache.dbname t.data.application.configuration.configId = configCache.getCouchID( ) t.data.application.command.psetTweak = 'tweak' t.data.application.command.configuration = 'configCache.file' return task def testA_BasicFunction(self): """ _BasicFunction_ Run a test to find out if we can grab a configCache """ configCache = self.getConfig() task = self.createTask(configCache=configCache) fetcher = CMSSWFetcher() fetcher.setWorkingDirectory(workingDir=self.testDir) self.assertEqual(fetcher.workingDirectory(), self.testDir) fetcher(wmTask=task) configFilePath = os.path.join(self.testDir, 'step2', 'configCache.file') self.assertTrue(os.path.isfile(configFilePath)) with open(configFilePath) as f: content = f.read() self.assertEqual(content, 'This Is A Test Config') return
class RepackTests(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.testDir = self.testInit.generateWorkDir() myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.listTasksByWorkflow = self.daoFactory(classname="Workflow.LoadFromName") self.listFilesets = self.daoFactory(classname="Fileset.List") self.listSubsMapping = self.daoFactory(classname="Subscriptions.ListSubsAndFilesetsFromWorkflow") return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.delWorkDir() return def testRepack(self): """ _testRepack_ Create a Repack workflow and verify it installs into WMBS correctly. """ testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Repack", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) repackWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack") repackWorkflow.load() self.assertEqual(len(repackWorkflow.outputMap.keys()), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Repack WF.") goldenOutputMods = {"write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = repackWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = repackWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_PrimaryDataset1_RAW": self.assertEqual(mergedOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Repack/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = repackWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = repackWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Repack") topLevelFileset.loadData() repackSubscription = Subscription(fileset=topLevelFileset, workflow=repackWorkflow) repackSubscription.loadData() self.assertEqual(repackSubscription["type"], "Repack", "Error: Wrong subscription type.") self.assertEqual(repackSubscription["split_algo"], "Repack", "Error: Wrong split algorithm. %s" % repackSubscription["split_algo"]) unmergedOutputs = {"write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW"} for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "RepackMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") repackLogCollect = Fileset(name="/TestWorkload/Repack/unmerged-logArchive") repackLogCollect.loadData() repackLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/LogCollect") repackLogCollectWorkflow.load() logCollectSub = Subscription(fileset=repackLogCollect, workflow=repackLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for goldenOutputMod, tier in goldenOutputMods.items(): repackMergeLogCollect = Fileset( name="/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod) repackMergeLogCollect.loadData() repackMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s/Repack%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) repackMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=repackMergeLogCollect, workflow=repackMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) # test default values taskPaths = ['/TestWorkload/Repack'] for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided testArguments["Multicore"] = 6 testArguments["Memory"] = 4600.0 testArguments["EventStreams"] = 3 testArguments["Outputs"] = deepcopy(REQUEST['Outputs']) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if task == '/TestWorkload/Repack' and step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), testArguments["Multicore"]) self.assertEqual(stepHelper.getNumberOfStreams(), testArguments["EventStreams"]) elif step in ('stageOut1', 'logArch1'): self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) else: self.assertEqual(stepHelper.getNumberOfCores(), 1, "%s should be single-core" % task) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], testArguments["Memory"]) return def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = ['/TestWorkload/Repack', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW'] expWfTasks = ['/TestWorkload/Repack', '/TestWorkload/Repack/LogCollect', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset1_RAW', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset2_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/Repackwrite_PrimaryDataset1_RAWMergeLogCollect', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/Repackwrite_PrimaryDataset2_RAWMergeLogCollect'] expFsets = ['TestWorkload-Repack-StreamerFiles', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-MergedRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-MergedErrorRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-MergedRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-MergedErrorRAW', '/TestWorkload/Repack/unmerged-write_PrimaryDataset1_RAWRAW', '/TestWorkload/Repack/unmerged-write_PrimaryDataset2_RAWRAW', '/TestWorkload/Repack/unmerged-logArchive'] subMaps = [(3, '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/Repackwrite_PrimaryDataset1_RAWMergeLogCollect', 'MinFileBased', 'LogCollect'), (6, '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/Repackwrite_PrimaryDataset2_RAWMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/Repack/unmerged-logArchive', '/TestWorkload/Repack/LogCollect', 'MinFileBased', 'LogCollect'), (4, '/TestWorkload/Repack/unmerged-write_PrimaryDataset1_RAWRAW', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset1_RAW', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/Repack/unmerged-write_PrimaryDataset1_RAWRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW', 'RepackMerge', 'Merge'), (7, '/TestWorkload/Repack/unmerged-write_PrimaryDataset2_RAWRAW', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset2_RAW', 'SiblingProcessingBased', 'Cleanup'), (5, '/TestWorkload/Repack/unmerged-write_PrimaryDataset2_RAWRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW', 'RepackMerge', 'Merge'), (1, 'TestWorkload-Repack-StreamerFiles', '/TestWorkload/Repack', 'Repack', 'Repack')] testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "Repack", blockName='StreamerFiles', cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
class ReportTest(unittest.TestCase): """ _ReportTest_ Unit tests for the Report class. """ def setUp(self): """ _setUp_ Figure out the location of the XML report produced by CMSSW. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setupCouch("report_t/fwjrs", "FWJRDump") self.xmlPath = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") self.badxmlPath = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWFailReport2.xml") self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ _tearDown_ Cleanup the databases. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def verifyInputData(self, report): """ _verifyInputData_ Verify that the input file in the Report class matches the input file in the XML generated by CMSSW. """ inputFiles = report.getInputFilesFromStep("cmsRun1") assert len(inputFiles) == 1, \ "Error: Wrong number of input files." assert inputFiles[0]["lfn"] == "/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: Wrong LFN on input file." assert inputFiles[0]["pfn"] == "dcap://cmsdca.fnal.gov:24137/pnfs/fnal.gov/usr/cms/WAX/11/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: Wrong PFN on input file." inputRun = list(inputFiles[0]["runs"]) assert len(inputRun) == 1, \ "Error: Wrong number of runs in input." assert inputRun[0].run == 122023, \ "Error: Wrong run number on input file." assert len(inputRun[0].lumis) == 1, \ "Error: Wrong number of lumis in input file." assert 215 in inputRun[0].lumis, \ "Error: Input file is missing lumis." assert inputFiles[0]["events"] == 2, \ "Error: Wrong number of events in input file." assert inputFiles[0]["size"] == 0, \ "Error: Wrong size in input file." assert inputFiles[0]["catalog"] == "trivialcatalog_file:/uscmst1/prod/sw/cms/SITECONF/T1_US_FNAL/PhEDEx/storage.xml?protocol=dcap", \ "Error: Catalog on input file is wrong." assert inputFiles[0]["guid"] == "142F3F42-C5D6-DE11-945D-000423D94494", \ "Error: GUID of input file is wrong." return def verifyRecoOutput(self, report): """ _verifyRecoOutput_ Verify that all the metadata in the RECO output module is correct. """ outputFiles = report.getFilesFromOutputModule("cmsRun1", "outputRECORECO") assert len(outputFiles) == 1, \ "Error: Wrong number of output files." assert outputFiles[0]["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root", \ "Error: Wrong LFN on output file: %s" % outputFiles[0]["lfn"] assert outputFiles[0]["pfn"] == "outputRECORECO.root", \ "Error: Wrong PFN on output file." outputRun = list(outputFiles[0]["runs"]) assert len(outputRun) == 1, \ "Error: Wrong number of runs in output." assert outputRun[0].run == 122023, \ "Error: Wrong run number on output file." assert len(outputRun[0].lumis) == 1, \ "Error: Wrong number of lumis in output file." assert 215 in outputRun[0].lumis, \ "Error: Output file is missing lumis." assert outputFiles[0]["events"] == 2, \ "Error: Wrong number of events in output file." assert outputFiles[0]["size"] == 0, \ "Error: Wrong size in output file." assert len(outputFiles[0]["input"]) == 1, \ "Error: Wrong number of input files." assert outputFiles[0]["input"][0] == "/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: LFN of input file is wrong." assert len(outputFiles[0]["checksums"]) == 0, \ "Error: There should be no checksums in output file." assert outputFiles[0]["catalog"] == "", \ "Error: Catalog on output file is wrong." assert outputFiles[0]["guid"] == "7E3359C8-222E-DF11-B2B0-001731230E47", \ "Error: GUID of output file is wrong: %s" % outputFiles[0]["guid"] assert outputFiles[0]["module_label"] == "outputRECORECO", \ "Error: Module label of output file is wrong." assert outputFiles[0]["branch_hash"] == "cf37adeb60b427f4ccd0e21b5771146b", \ "Error: Branch has on output file is wrong." return def verifyAlcaOutput(self, report): """ _verifyAlcaOutput_ Verify that all of the meta data in the ALCARECO output module is correct. """ outputFiles = report.getFilesFromOutputModule("cmsRun1", "outputALCARECORECO") assert len(outputFiles) == 1, \ "Error: Wrong number of output files." assert outputFiles[0]["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/ALCARECO/rereco_GR09_R_34X_V5_All_v1/0000/B8F849C9-222E-DF11-B2B0-001731230E47.root", \ "Error: Wrong LFN on output file: %s" % outputFiles[0]["lfn"] assert outputFiles[0]["pfn"] == "outputALCARECORECO.root", \ "Error: Wrong PFN on output file." outputRun = list(outputFiles[0]["runs"]) assert len(outputRun) == 1, \ "Error: Wrong number of runs in output." assert outputRun[0].run == 122023, \ "Error: Wrong run number on output file." assert len(outputRun[0].lumis) == 1, \ "Error: Wrong number of lumis in output file." assert 215 in outputRun[0].lumis, \ "Error: Output file is missing lumis." assert outputFiles[0]["events"] == 2, \ "Error: Wrong number of events in output file." assert outputFiles[0]["size"] == 0, \ "Error: Wrong size in output file." assert len(outputFiles[0]["input"]) == 1, \ "Error: Wrong number of input files." assert outputFiles[0]["input"][0] == "/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: LFN of input file is wrong." assert len(outputFiles[0]["checksums"]) == 0, \ "Error: There should be no checksums in output file." assert outputFiles[0]["catalog"] == "", \ "Error: Catalog on output file is wrong." assert outputFiles[0]["guid"] == "B8F849C9-222E-DF11-B2B0-001731230E47", \ "Error: GUID of output file is wrong: %s" % outputFiles[0]["guid"] assert outputFiles[0]["module_label"] == "outputALCARECORECO", \ "Error: Module label of output file is wrong." assert outputFiles[0]["branch_hash"] == "cf37adeb60b427f4ccd0e21b5771146b", \ "Error: Branch has on output file is wrong." return def testXMLParsing(self): """ _testParsing_ Verify that the parsing of a CMSSW XML report works correctly. """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) self.verifyInputData(myReport) self.verifyRecoOutput(myReport) self.verifyAlcaOutput(myReport) return def testBadXMLParsing(self): """ _testBadXMLParsing_ Verify that the parsing of a CMSSW XML report works correctly even if the XML is malformed. This should raise a FwkJobReportException, which in CMSSW will be caught """ myReport = Report("cmsRun1") from WMCore.FwkJobReport.Report import FwkJobReportException self.assertRaises(FwkJobReportException, myReport.parse, self.badxmlPath) self.assertEqual( myReport.getStepErrors("cmsRun1")['error0'].type, 'BadFWJRXML') self.assertEqual( myReport.getStepErrors("cmsRun1")['error0'].exitCode, 50115) return def testErrorReporting(self): """ _testErrorReporting_ Verify that errors are correctly transfered from the XML report to the python report. """ cmsException = \ """cms::Exception caught in cmsRun ---- EventProcessorFailure BEGIN EventProcessingStopped ---- ScheduleExecutionFailure BEGIN ProcessingStopped ---- NoRecord BEGIN No "CastorDbRecord" record found in the EventSetup. Please add an ESSource or ESProducer that delivers such a record. cms::Exception going through module CastorRawToDigi/castorDigis run: 121849 lumi: 1 event: 23 ---- NoRecord END Exception going through path raw2digi_step ---- ScheduleExecutionFailure END an exception occurred during current event processing cms::Exception caught in EventProcessor and rethrown ---- EventProcessorFailure END""" xmlPath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWFailReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) assert hasattr(myReport.data.cmsRun1, "errors"), \ "Error: Error section missing." assert getattr(myReport.data.cmsRun1.errors, "errorCount") == 1, \ "Error: Error count is wrong." assert hasattr(myReport.data.cmsRun1.errors, "error0"), \ "Error: Error0 section is missing." assert myReport.data.cmsRun1.errors.error0.type == "CMSException", \ "Error: Wrong error type." assert myReport.data.cmsRun1.errors.error0.exitCode == "8001", \ "Error: Wrong exit code." assert myReport.data.cmsRun1.errors.error0.details == cmsException, \ "Error: Error details are wrong:\n|%s|\n|%s|" % (myReport.data.cmsRun1.errors.error0.details, cmsException) # Test getStepErrors self.assertEqual( myReport.getStepErrors("cmsRun1")['error0'].type, "CMSException") return def testMultipleInputs(self): """ _testMultipleInputs_ Verify that parsing XML reports with multiple inputs works correctly. """ xmlPath = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWMultipleInput.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) assert hasattr(myReport.data.cmsRun1.input, "source"), \ "Error: Report missing input source." inputFiles = myReport.getInputFilesFromStep("cmsRun1") assert len(inputFiles) == 2, \ "Error: Wrong number of input files." for inputFile in inputFiles: assert inputFile["input_type"] == "primaryFiles", \ "Error: Wrong input type." assert inputFile["module_label"] == "source", \ "Error: Module label is wrong" assert inputFile["catalog"] == "trivialcatalog_file:/uscmst1/prod/sw/cms/SITECONF/T1_US_FNAL/PhEDEx/storage.xml?protocol=dcap", \ "Error: Catalog is wrong." assert inputFile["events"] == 2, \ "Error: Wrong number of events." assert inputFile["input_source_class"] == "PoolSource", \ "Error: Wrong input source class." if inputFile["guid"] == "F0875ECD-3347-DF11-9FE0-003048678A80": assert inputFile["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/F0875ECD-3347-DF11-9FE0-003048678A80.root", \ "Error: Input LFN is wrong." assert inputFile["pfn"] == "dcap://cmsdca3.fnal.gov:24142/pnfs/fnal.gov/usr/cms/WAX/11/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/F0875ECD-3347-DF11-9FE0-003048678A80.root", \ "Error: Input PFN is wrong." assert len(inputFile["runs"]) == 1, \ "Error: Wrong number of runs." assert list(inputFile["runs"])[0].run == 124216, \ "Error: Wrong run number." assert 1 in list(inputFile["runs"])[0], \ "Error: Wrong lumi sections in input file." else: assert inputFile["guid"] == "626D74CE-3347-DF11-9363-0030486790C0", \ "Error: Wrong guid." assert inputFile["lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/626D74CE-3347-DF11-9363-0030486790C0.root", \ "Error: Input LFN is wrong." assert inputFile["pfn"] == "dcap://cmsdca3.fnal.gov:24142/pnfs/fnal.gov/usr/cms/WAX/11/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/626D74CE-3347-DF11-9363-0030486790C0.root", \ "Error: Input PFN is wrong." assert len(inputFile["runs"]) == 1, \ "Error: Wrong number of runs." assert list(inputFile["runs"])[0].run == 124216, \ "Error: Wrong run number." assert 2 in list(inputFile["runs"])[0], \ "Error: Wrong lumi sections in input file." return def testJSONEncoding(self): """ _testJSONEncoding_ Verify that turning the FWJR into a JSON object works correctly. """ xmlPath = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) jsonReport = myReport.__to_json__(None) assert "task" in jsonReport.keys(), \ "Error: Task name missing from report." assert len(jsonReport["steps"].keys()) == 1, \ "Error: Wrong number of steps in report." assert "cmsRun1" in jsonReport["steps"].keys(), \ "Error: Step missing from json report." cmsRunStep = jsonReport["steps"]["cmsRun1"] jsonReportSections = [ "status", "errors", "logs", "parameters", "site", "analysis", "cleanup", "input", "output", "start" ] for jsonReportSection in jsonReportSections: assert jsonReportSection in cmsRunStep.keys(), \ "Error: missing section: %s" % jsonReportSection return def testTimeSetting(self): """ _testTimeSetting_ Can we set the times correctly? """ stepName = "cmsRun1" timeDiff = 0.01 myReport = Report(stepName) localTime = time.time() myReport.setStepStartTime(stepName) myReport.setStepStopTime(stepName) repTime = myReport.getTimes(stepName) self.assertTrue(repTime["startTime"] - localTime < timeDiff) self.assertTrue(repTime["stopTime"] - localTime < timeDiff) myReport = Report("cmsRun1") myReport.addStep("cmsRun2") myReport.addStep("cmsRun3") step = myReport.retrieveStep("cmsRun1") step.startTime = 1 step.stopTime = 8 step = myReport.retrieveStep("cmsRun2") step.startTime = 2 step.stopTime = 9 step = myReport.retrieveStep("cmsRun3") step.startTime = 3 step.stopTime = 10 self.assertEqual(myReport.getFirstStartLastStop()['stopTime'], 10) self.assertEqual(myReport.getFirstStartLastStop()['startTime'], 1) return def testTaskJobID(self): """ _testTaskJobID_ Test the basic task and jobID functions """ report = Report('fake') self.assertEqual(report.getTaskName(), None) self.assertEqual(report.getJobID(), None) report.setTaskName('silly') report.setJobID(100) self.assertEqual(report.getTaskName(), 'silly') self.assertEqual(report.getJobID(), 100) return def test_PerformanceReport(self): """ _PerformanceReport_ Test the performance report part of the job report """ report = Report("cmsRun1") report.setStepVSize(stepName="cmsRun1", min=100, max=800, average=244) report.setStepRSS(stepName="cmsRun1", min=100, max=800, average=244) report.setStepPCPU(stepName="cmsRun1", min=100, max=800, average=244) report.setStepPMEM(stepName="cmsRun1", min=100, max=800, average=244) perf = report.retrieveStep("cmsRun1").performance for section in perf.dictionary_().values(): d = section.dictionary_() self.assertEqual(d['min'], 100) self.assertEqual(d['max'], 800) self.assertEqual(d['average'], 244) return def testPerformanceSummary(self): """ _testPerformanceSummary_ Test whether or not we can pull performance information out of a Timing/SimpleMemoryCheck jobReport """ xmlPath = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) # Do a brief check of the three sections perf = myReport.data.cmsRun1.performance self.assertEqual(perf.memory.PeakValueRss, '492.293') self.assertEqual(perf.cpu.TotalJobCPU, '9.16361') self.assertEqual(perf.storage.writeTotalMB, 5.22226) self.assertEqual(perf.storage.writeTotalSecs, 60317.4) self.assertEqual(perf.storage.readPercentageOps, 0.98585512216030857) return def testPerformanceJSON(self): """ _testPerformanceJSON_ Verify that the performance section of the report is correctly converted to JSON. """ xmlPath = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) perfSection = myReport.__to_json__( thunker=None)["steps"]["cmsRun1"]["performance"] self.assertTrue(perfSection.has_key("storage"), "Error: Storage section is missing.") self.assertTrue(perfSection.has_key("memory"), "Error: Memory section is missing.") self.assertTrue(perfSection.has_key("cpu"), "Error: CPU section is missing.") self.assertEqual(perfSection["cpu"]["AvgEventCPU"], "0.626105", "Error: AvgEventCPU is wrong.") self.assertEqual(perfSection["cpu"]["TotalJobTime"], "23.5703", "Error: TotalJobTime is wrong.") self.assertEqual(perfSection["storage"]["readTotalMB"], 39.6166, "Error: readTotalMB is wrong.") self.assertEqual(perfSection["storage"]["readMaxMSec"], 320.653, "Error: readMaxMSec is wrong") self.assertEqual(perfSection["memory"]["PeakValueRss"], "492.293", "Error: PeakValueRss is wrong.") self.assertEqual(perfSection["memory"]["PeakValueVsize"], "643.281", "Error: PeakValueVsize is wrong.") return def testExitCode(self): """ _testExitCode_ Test and see if we can get an exit code out of a report Note: Errors without a return code return 99999 """ report = Report("cmsRun1") self.assertEqual(report.getExitCode(), 0) report.addError(stepName="cmsRun1", exitCode=None, errorType="test", errorDetails="test") self.assertEqual(report.getExitCode(), 99999) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 99999) report.addError(stepName="cmsRun1", exitCode='12345', errorType="test", errorDetails="test") self.assertEqual(report.getExitCode(), 12345) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 12345) def testProperties(self): """ _testProperties_ Test data fields for the properties information for DBS """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) name = "ThisIsASillyString" myReport.setValidStatus(name) myReport.setGlobalTag(name) myReport.setAcquisitionProcessing(acquisitionEra='NULL', processingVer=name) myReport.setInputDataset(inputPath='/lame/path') myReport.setCustodialSite(custodialSite='testCustody') for f in myReport.getAllFilesFromStep("cmsRun1"): self.assertEqual(f['globalTag'], name) self.assertEqual(f['validStatus'], name) self.assertEqual(f['processingVer'], name) self.assertEqual(f['acquisitionEra'], 'NULL') self.assertEqual(f['inputPath'], '/lame/path') self.assertEqual(f['custodialSite'], 'testCustody') return def testOutputFiles(self): """ _testOutputFiles_ Test some basic manipulation of output files """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) files = myReport.getAllFilesFromStep(step="cmsRun1") f1 = files[0] f2 = files[1] self.assertEqual(f1['outputModule'], 'outputRECORECO') self.assertEqual(f1['pfn'], 'outputRECORECO.root') self.assertEqual(f2['outputModule'], 'outputALCARECORECO') self.assertEqual(f2['pfn'], 'outputALCARECORECO.root') for f in files: self.assertEqual(f['events'], 2) self.assertEqual(f['configURL'], None) self.assertEqual(f['merged'], False) self.assertEqual(f['validStatus'], None) self.assertEqual(f['first_event'], 0) return def testGetAdlerChecksum(self): """ _testGetAdlerChecksum_ Test the function that sees if all files have an adler checksum. For some reason, our default XML report doesn't have checksums Therefore it should fail. """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) myReport.checkForAdlerChecksum(stepName="cmsRun1") self.assertFalse(myReport.stepSuccessful(stepName="cmsRun1")) self.assertEqual(myReport.getExitCode(), 60451) # Now see what happens if the adler32 is set to None myReport2 = Report("cmsRun1") myReport2.parse(self.xmlPath) fRefs = myReport2.getAllFileRefsFromStep(step="cmsRun1") for fRef in fRefs: fRef.checksums = {'adler32': None} myReport2.checkForAdlerChecksum(stepName="cmsRun1") self.assertFalse(myReport2.stepSuccessful(stepName="cmsRun1")) self.assertEqual(myReport2.getExitCode(), 60451) myReport3 = Report("cmsRun1") myReport3.parse(self.xmlPath) fRefs = myReport3.getAllFileRefsFromStep(step="cmsRun1") for fRef in fRefs: fRef.checksums = {'adler32': 100} myReport3.checkForAdlerChecksum(stepName="cmsRun1") self.assertTrue(myReport3.getExitCode() != 60451) return def testTaskSuccessful(self): """ _testTaskSuccessful_ Test whether or not the report marks the task successful """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) # First, the report should fail self.assertFalse(myReport.taskSuccessful()) # Second, if we ignore cmsRun, the task # should succeed self.assertTrue(myReport.taskSuccessful(ignoreString='cmsRun')) return def testMultiCoreReport(self): """ _testMultiCoreReport_ Verify that multicore reports can be json encoded and uploaded to couch. """ couchdb = CouchServer(os.environ["COUCHURL"]) fwjrdatabase = couchdb.connectDatabase("report_t/fwjrs") self.mcPath = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/MulticoreReport.pkl") myReport = Report() myReport.unpersist(self.mcPath) fwjrDocument = { "_id": "303-0", "jobid": 303, "retrycount": 0, "fwjr": myReport.__to_json__(None), "type": "fwjr" } fwjrdatabase.queue(fwjrDocument, timestamp=True) fwjrdatabase.commit() return def testStripReport(self): """ _testStripReport_ Test whether or not we can strip input file information from a FWJR and create a smaller object. """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) path1 = os.path.join(self.testDir, 'testReport1.pkl') path2 = os.path.join(self.testDir, 'testReport2.pkl') myReport.save(path1) info = BasicAlgos.getFileInfo(filename=path1) self.assertEqual(info['Size'], 6821) inputFiles = myReport.getAllInputFiles() self.assertEqual(len(inputFiles), 1) myReport.stripInputFiles() self.assertEqual(len(myReport.getAllInputFiles()), 0) myReport.save(path2) info = BasicAlgos.getFileInfo(filename=path2) self.assertEqual(info['Size'], 5933) return def testDuplicatStep(self): """ _testDuplicateStep_ If the same step is added twice, it should act as a replacement, and raise an appropriate message """ baseReport = Report("cmsRun1") baseReport.parse(self.xmlPath) modReport = Report("cmsRun1") modReport.parse(self.xmlPath) setattr(modReport.data.cmsRun1, 'testVar', 'test01') report = Report() report.setStep(stepName='cmsRun1', stepSection=baseReport.retrieveStep('cmsRun1')) report.setStep(stepName='cmsRun1', stepSection=modReport.retrieveStep('cmsRun1')) self.assertEqual(report.listSteps(), ['cmsRun1']) self.assertEqual(report.data.cmsRun1.testVar, 'test01') return def testDeleteOutputModule(self): """ _testDeleteOutputModule_ If asked delete an output module, if it doesn't exist then do nothing """ originalReport = Report("cmsRun1") originalReport.parse(self.xmlPath) self.assertTrue( originalReport.getOutputModule("cmsRun1", "outputALCARECORECO"), "Error: Report XML doesn't have the module for the test, invalid test" ) originalOutputModules = len( originalReport.retrieveStep("cmsRun1").outputModules) print originalReport.data originalReport.deleteOutputModuleForStep("cmsRun1", "outputALCARECORECO") print originalReport.data self.assertFalse( originalReport.getOutputModule("cmsRun1", "outputALCARECORECO"), "Error: The output module persists after deletion") self.assertEqual( len(originalReport.retrieveStep("cmsRun1").outputModules), originalOutputModules - 1, "Error: The number of output modules is incorrect after deletion")
class PileupFetcherTest(unittest.TestCase): def setUp(self): """ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("pileupfetcher_t", "ConfigCache") self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("pileupfetcher_t") self.testDir = self.testInit.generateWorkDir() EmulatorHelper.setEmulators(dbs = True) def tearDown(self): """ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() EmulatorHelper.resetEmulators() def injectGenerationConfig(self): """ _injectGenerationConfig_ Inject a generation config for the MC workflow. """ config = Document() config["info"] = None config["config"] = None config["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" config["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" config["owner"] = {"group": "cmsdataops", "user": "******"} config["pset_tweak_details"] = None config["pset_tweak_details"] = \ {"process": {"outputModules_": ["OutputA"], "OutputA": {"dataset": {"filterName": "OutputAFilter", "dataTier": "GEN-SIM-RAW"}}}} result = self.configDatabase.commitOne(config) return result[0]["id"] def _queryAndCompareWithDBS(self, pileupDict, defaultArguments, dbsUrl): """ pileupDict is a Python dictionary containing particular pileup configuration information. Query DBS on given dataset contained now in both input defaultArguments as well as in the pileupDict and compare values. """ args = {} args["version"] = "DBS_2_0_9" args["mode"] = "GET" reader = DBSReader(dbsUrl, **args) inputArgs = defaultArguments["PileupConfig"] self.assertEqual(len(inputArgs), len(pileupDict), "Number of pileup types different.") for pileupType in inputArgs: m = ("pileup type '%s' not in PileupFetcher-produced pileup " "configuration: '%s'" % (pileupType, pileupDict)) self.assertTrue(pileupType in pileupDict, m) # now query DBS for compare actual results on files lists for each # pileup type and dataset and location (storage element names) # pileupDict is saved in the file and now comparing items of this # configuration with actual DBS results, the structure of pileupDict: # {"pileupTypeA": {"BlockA": {"FileList": [], "StorageElementNames": []}, # "BlockB": {"FileList": [], "StorageElementName": []}, ....} for pileupType, datasets in inputArgs.items(): # this is from the pileup configuration produced by PileupFetcher blockDict = pileupDict[pileupType] for dataset in datasets: dbsFileBlocks = reader.listFileBlocks(dataset = dataset) for dbsFileBlockName in dbsFileBlocks: fileList = [] # list of files in the block (dbsFile["LogicalFileName"]) storageElemNames = set() # list of StorageElementName # each DBS block has a list under 'StorageElementList', iterate over storageElements = reader.listFileBlockLocation(dbsFileBlockName) for storElem in storageElements: storageElemNames.add(storElem) # now get list of files in the block dbsFiles = reader.listFilesInBlock(dbsFileBlockName) for dbsFile in dbsFiles: fileList.append(dbsFile["LogicalFileName"]) # now compare the sets: m = ("StorageElementNames don't agree for pileup type '%s', " "dataset '%s' in configuration: '%s'" % (pileupType, dataset, pileupDict)) self.assertEqual(set(blockDict[dbsFileBlockName]["StorageElementNames"]), storageElemNames, m) m = ("FileList don't agree for pileup type '%s', dataset '%s' " " in configuration: '%s'" % (pileupType, dataset, pileupDict)) print fileList print blockDict[dbsFileBlockName]["FileList"] self.assertEqual(sorted(blockDict[dbsFileBlockName]["FileList"]), sorted(fileList)) def _queryPileUpConfigFile(self, defaultArguments, task, taskPath): """ Query and compare contents of the the pileup JSON configuration files. Iterate over tasks's steps as it happens in the PileupFetcher. """ for step in task.steps().nodeIterator(): helper = WMStep.WMStepHelper(step) # returns e.g. instance of CMSSWHelper if hasattr(helper.data, "pileup"): decoder = JSONDecoder() stepPath = "%s/%s" % (taskPath, helper.name()) pileupConfig = "%s/%s" % (stepPath, "pileupconf.json") try: f = open(pileupConfig, 'r') json = f.read() pileupDict = decoder.decode(json) f.close() except IOError: m = "Could not read pileup JSON configuration file: '%s'" % pileupConfig self.fail(m) self._queryAndCompareWithDBS(pileupDict, defaultArguments, helper.data.dbsUrl) def testPileupFetcherOnMC(self): pileupMcArgs = MonteCarloWorkloadFactory.getTestArguments() pileupMcArgs["PileupConfig"] = {"cosmics": ["/Mu/PenguinsPenguinsEverywhere-SingleMu-HorriblyJaundicedYellowEyedPenginsSearchingForCarrots-v31/RECO"], "minbias": ["/Mu/PenguinsPenguinsEverywhere-SingleMu-HorriblyJaundicedYellowEyedPenginsSearchingForCarrots-v31/RECO"]} pileupMcArgs["CouchURL"] = os.environ["COUCHURL"] pileupMcArgs["CouchDBName"] = "pileupfetcher_t" pileupMcArgs["ConfigCacheID"] = self.injectGenerationConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", pileupMcArgs) # Since this is test of the fetcher - The loading from WMBS isn't # really necessary because the fetching happens before the workflow # is inserted into WMBS: feed the workload instance directly into fetcher: fetcher = PileupFetcher() creator = SandboxCreator() pathBase = "%s/%s" % (self.testDir, testWorkload.name()) for topLevelTask in testWorkload.taskIterator(): for taskNode in topLevelTask.nodeIterator(): # this is how the call to PileupFetcher is happening # from the SandboxCreator test task = WMTask.WMTaskHelper(taskNode) taskPath = "%s/WMSandbox/%s" % (pathBase, task.name()) fetcher.setWorkingDirectory(taskPath) # create Sandbox for the fetcher ... creator._makePathonPackage(taskPath) fetcher(task) self._queryPileUpConfigFile(pileupMcArgs, task, taskPath)
class Tier0PluginTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup the test environment """ self.testInit = TestInit(__file__) self.testInit.setDatabaseConnection() self.testInit.setSchema(["WMCore.WMBS"]) self.requestCouchDB = 'wmstats_plugin_t' self.testInit.setupCouch(self.requestCouchDB, 'T0Request') self.testDir = self.testInit.generateWorkDir() reqDBURL = "%s/%s" % (os.environ['COUCHURL'], self.requestCouchDB) self.requestDBWriter = RequestDBWriter(reqDBURL, couchapp="T0Request") self.requestDBWriter._setNoStale() self.stateMap = {} self.orderedStates = [] self.plugin = None return def tearDown(self): """ _tearDown_ Clear databases and delete files """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def setupRepackWorkflow(self): """ _setupRepackWorkflow_ Populate WMBS with a repack-like workflow, every subscription must be unfinished at first """ workflowName = 'Repack_Run481516_StreamZ' mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW', 'RepackMergewrite_SingleNeutrino_RAW'] self.stateMap = {'Merge': [], 'Processing Done': []} self.orderedStates = ['Merge', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') # Create a wmspec in disk workload = newWorkload(workflowName) repackTask = workload.newTask('Repack') for task in mergeTasks: repackTask.addTask(task) repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW') specPath = os.path.join(self.testDir, 'Repack.pkl') workload.save(specPath) # Populate WMBS topFileset = Fileset(name='TestStreamerFileset') topFileset.create() options = {'spec': specPath, 'owner': 'ItsAMeMario', 'name': workflowName, 'wfType': 'tier0'} topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName, **options) topLevelWorkflow.create() topLevelSub = Subscription(topFileset, topLevelWorkflow) topLevelSub.create() self.stateMap['Merge'].append(topFileset) for task in mergeTasks: mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options) mergeWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task) unmergedFileset.create() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.create() self.stateMap['Processing Done'].append(unmergedFileset) cleanupWorkflow = Workflow(task='/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW', **options) cleanupWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup') unmergedFileset.create() cleanupSub = Subscription(unmergedFileset, cleanupWorkflow) cleanupSub.create() return def setupExpressWorkflow(self): """ _setupExpressWorkflow_ Populate WMBS with a express-like workflow, every subscription must be unfinished at first """ workflowName = 'Express_Run481516_StreamZFast' secondLevelTasks = ['ExpressMergewrite_StreamZFast_DQM', 'ExpressMergewrite_ExpressPhysics_FEVT', 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO', 'ExpressCleanupUnmergedwrite_StreamZFast_DQM', 'ExpressCleanupUnmergedwrite_ExpressPhysics_FEVT', 'ExpressCleanupUnmergedwrite_StreamZFast_ALCARECO'] alcaHarvestTask = 'ExpressAlcaSkimwrite_StreamZFast_ALCARECOAlcaHarvestALCARECOStreamPromptCalibProd' dqmHarvestTask = 'ExpressMergewrite_StreamZFast_DQMEndOfRunDQMHarvestMerged' self.stateMap = {'Merge': [], 'Harvesting': [], 'Processing Done': []} self.orderedStates = ['Merge', 'Harvesting', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') # Create a wmspec in disk workload = newWorkload(workflowName) expressTask = workload.newTask('Express') for task in secondLevelTasks: secondLevelTask = expressTask.addTask(task) if task == 'ExpressAlcaSkimwrite_StreamZFast_ALCARECO': secondLevelTask.addTask(alcaHarvestTask) elif task == 'ExpressMergewrite_StreamZFast_DQM': secondLevelTask.addTask(dqmHarvestTask) specPath = os.path.join(self.testDir, 'Express.pkl') workload.save(specPath) # Populate WMBS sharedFileset = Fileset(name='TestFileset') sharedFileset.create() sharedFileset.markOpen(False) options = {'spec': specPath, 'owner': 'ItsAMeMario', 'name': workflowName, 'wfType': 'tier0'} topLevelWorkflow = Workflow(task='/%s/Express' % workflowName, **options) topLevelWorkflow.create() topLevelSub = Subscription(sharedFileset, topLevelWorkflow) topLevelSub.create() self.stateMap['Merge'].append(topLevelSub) for task in [x for x in secondLevelTasks if not x.count('CleanupUnmerged')]: secondLevelWorkflow = Workflow(task='/%s/Express/%s' % (workflowName, task), **options) secondLevelWorkflow.create() mergeSub = Subscription(sharedFileset, secondLevelWorkflow) mergeSub.create() self.stateMap['Harvesting'].append(mergeSub) for (parent, child) in [('ExpressAlcaSkimwrite_StreamZFast_ALCARECO', alcaHarvestTask), ('ExpressMergewrite_StreamZFast_DQM', dqmHarvestTask)]: harvestingWorkflow = Workflow(task='/%s/Express/%s/%s' % (workflowName, parent, child), **options) harvestingWorkflow.create() harvestingSub = Subscription(sharedFileset, harvestingWorkflow) harvestingSub.create() self.stateMap['Processing Done'].append(harvestingSub) return def setupPromptRecoWorkflow(self): """ _setupPromptRecoWorkflow_ Populate WMBS with a real PromptReco workflow, every subscription must be unfinished at first """ # Populate disk and WMBS testArguments = PromptRecoWorkloadFactory.getTestArguments() workflowName = 'PromptReco_Run195360_Cosmics' factory = PromptRecoWorkloadFactory() testArguments["EnableHarvesting"] = True testArguments["CouchURL"] = os.environ["COUCHURL"] workload = factory.factoryWorkloadConstruction(workflowName, testArguments) wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir) wmbsHelper.createTopLevelFileset() wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset) self.stateMap = {'AlcaSkim': [], 'Merge': [], 'Harvesting': [], 'Processing Done': []} self.orderedStates = ['AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') topLevelTask = '/%s/Reco' % workflowName alcaSkimTask = '%s/AlcaSkim' % topLevelTask mergeTasks = ['%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM', '%s/RecoMergewrite_RECO'] harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription) alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask) alcaSkimWorkflow.load() alcarecoFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO') alcarecoFileset.load() alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow) alcaSkimSub.load() self.stateMap['Merge'].append(alcaSkimSub) for task in mergeTasks: mergeTask = task % topLevelTask mergeWorkflow = Workflow(name=workflowName, task=mergeTask) mergeWorkflow.load() if 'AlcaSkim' in mergeTask: stream = mergeTask.split('/')[-1][13:] unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream)) unmergedFileset.load() else: dataTier = mergeTask.split('/')[-1].split('_')[-1] unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier)) unmergedFileset.load() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.load() self.stateMap['Harvesting'].append(mergeSub) harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask) harvestingWorkflow.load() harvestingFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM') harvestingFileset.load() harvestingSub = Subscription(harvestingFileset, harvestingWorkflow) harvestingSub.load() self.stateMap['Processing Done'].append(harvestingSub) return def verifyStateTransitions(self, transitionMethod='markFinished', transitionTrigger=True): """ _verifyStateTransitions_ Utility method which goes through the list of states in self.orderedStates and finishes the tasks that demand a state transition in each step. This according to the defined transition method and trigger. It verifies that the request document in WMStats is moving according to the transitions """ for idx in range(0, len(self.orderedStates) * 2): nextState = self.orderedStates[idx / 2] if (idx / 2) == 0: currentState = 'Closed' else: currentState = self.orderedStates[idx / 2 - 1] if idx % 2 == 0: for transitionObject in self.stateMap[nextState][:-1]: method = getattr(transitionObject, transitionMethod) method(transitionTrigger) self.plugin([], self.requestDBWriter, self.requestDBWriter) currentStateWorkflows = self.requestDBWriter.getRequestByStatus([currentState]) nextStateWorkflows = self.requestDBWriter.getRequestByStatus([nextState]) self.assertEqual(len(currentStateWorkflows), 1, 'Workflow moved incorrectly from %s' % currentState) self.assertEqual(len(nextStateWorkflows), 0, 'Workflow moved incorrectly to %s' % nextState) else: transitionObject = self.stateMap[nextState][-1] method = getattr(transitionObject, transitionMethod) method(transitionTrigger) self.plugin([], self.requestDBWriter, self.requestDBWriter) currentStateWorkflows = self.requestDBWriter.getRequestByStatus([currentState]) nextStateWorkflows = self.requestDBWriter.getRequestByStatus([nextState]) self.assertEqual(len(currentStateWorkflows), 0, 'Workflow did not move correctly from %s' % currentState) self.assertEqual(len(nextStateWorkflows), 1, 'Workflow did not move correctly to %s' % nextState) return def testA_RepackStates(self): """ _testA_RepackStates_ Setup an environment with a Repack workflow and traverse through the different states. Check that the transitions are sane. """ # Set the environment self.setupRepackWorkflow() self.plugin = Tier0Plugin() # Verify the transitions self.verifyStateTransitions('markOpen', False) return def testB_ExpressStates(self): """ _testB_ExpressStates_ Setup an environment with a Express workflow and traverse through the different states. Check that the transitions are sane. """ # Set the environment self.setupExpressWorkflow() self.plugin = Tier0Plugin() # Verify the transitions self.verifyStateTransitions() return def testC_PromptRecoStates(self): """ _testC_PromptRecoStates_ Setup an environment with a PromptReco workflow and traverse through the different states. Check that the transitions are sane. """ # Set the environment self.setupPromptRecoWorkflow() self.plugin = Tier0Plugin() # Verify the transitions self.verifyStateTransitions() return
class JobArchiverTest(unittest.TestCase): """ TestCase for TestJobArchiver module """ _maxMessage = 10 def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() #self.tearDown() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.testInit.setupCouch("jobarchiver_t_0/jobs", "JobDump") self.testInit.setupCouch("jobarchiver_t_0/fwjrs", "FWJRDump") self.daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daofactory(classname="Jobs.GetAllJobs") self.testDir = self.testInit.generateWorkDir(deleteOnDestruction=False) self.nJobs = 10 self.alertsReceiver = None EmulatorHelper.setEmulators(phedex=True, dbs=True, siteDB=True, requestMgr=False) return def tearDown(self): """ Database deletion """ EmulatorHelper.resetEmulators() self.testInit.clearDatabase(modules=["WMCore.WMBS"]) self.testInit.tearDownCouch() self.testInit.delWorkDir() if self.alertsReceiver: self.alertsReceiver.shutdown() return def getConfig(self): """ _createConfig_ General config file """ config = Configuration() #First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", os.getcwd()) config.General.WorkDir = os.getenv("TESTDIR", os.getcwd()) #Now the CoreDatabase information #This should be the dialect, dburl, etc config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL", "cmssrv48.fnal.gov:5984") config.JobStateMachine.couchDBName = "jobarchiver_t_0" config.component_("JobArchiver") config.JobArchiver.pollInterval = 60 config.JobArchiver.logLevel = 'INFO' #config.JobArchiver.logDir = os.path.join(self.testDir, 'logs') config.JobArchiver.componentDir = self.testDir config.JobArchiver.numberOfJobsToCluster = 1000 config.component_('WorkQueueManager') config.WorkQueueManager.namespace = "WMComponent.WorkQueueManager.WorkQueueManager" config.WorkQueueManager.componentDir = config.General.workDir + "/WorkQueueManager" config.WorkQueueManager.level = 'LocalQueue' config.WorkQueueManager.logLevel = 'DEBUG' config.WorkQueueManager.couchurl = 'https://None' config.WorkQueueManager.dbname = 'whatever' config.WorkQueueManager.inboxDatabase = 'whatever2' config.WorkQueueManager.queueParams = {} config.WorkQueueManager.queueParams[ "ParentQueueCouchUrl"] = "https://cmsweb.cern.ch/couchdb/workqueue" # addition for Alerts messaging framework, work (alerts) and control # channel addresses to which the component will be sending alerts # these are destination addresses where AlertProcessor:Receiver listens config.section_("Alert") config.Alert.address = "tcp://127.0.0.1:5557" config.Alert.controlAddr = "tcp://127.0.0.1:5559" return config def createTestJobGroup(self): """ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup def testA_BasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() testJobGroup = self.createTestJobGroup() changer = ChangeState(config) cacheDir = os.path.join(self.testDir, 'test') if not os.path.isdir(cacheDir): os.mkdir(cacheDir) #if os.path.isdir(config.JobArchiver.logDir): # shutil.rmtree(config.JobArchiver.logDir) for job in testJobGroup.jobs: myThread.transaction.begin() job["outcome"] = "success" job.save() myThread.transaction.commit() path = os.path.join(cacheDir, job['name']) os.makedirs(path) f = open('%s/%s.out' % (path, job['name']), 'w') f.write(job['name']) f.close() job.setCache(path) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') testJobArchiver = JobArchiverPoller(config=config) testJobArchiver.algorithm() result = myThread.dbi.processData( "SELECT wmbs_job_state.name FROM wmbs_job_state INNER JOIN wmbs_job ON wmbs_job.state = wmbs_job_state.id" )[0].fetchall() for val in result: self.assertEqual(val.values(), ['cleanout']) dirList = os.listdir(cacheDir) for job in testJobGroup.jobs: self.assertEqual(job["name"] in dirList, False) logPath = os.path.join(config.JobArchiver.componentDir, 'logDir', 'w', 'wf001', 'JobCluster_0') logList = os.listdir(logPath) for job in testJobGroup.jobs: self.assertEqual( 'Job_%i.tar.bz2' % (job['id']) in logList, True, 'Could not find transferred tarball for job %i' % (job['id'])) pipe = Popen([ 'tar', '-jxvf', os.path.join(logPath, 'Job_%i.tar.bz2' % (job['id'])) ], stdout=PIPE, stderr=PIPE, shell=False) pipe.wait() #filename = '%s/%s/%s.out' %(cacheDir[1:], job['name'], job['name']) filename = 'Job_%i/%s.out' % (job['id'], job['name']) self.assertEqual(os.path.isfile(filename), True, 'Could not find file %s' % (filename)) f = open(filename, 'r') fileContents = f.readlines() f.close() self.assertEqual(fileContents[0].find(job['name']) > -1, True) shutil.rmtree('Job_%i' % (job['id'])) if os.path.isfile('Job_%i.tar.bz2' % (job['id'])): os.remove('Job_%i.tar.bz2' % (job['id'])) return @attr('integration') def testB_SpeedTest(self): """ _SpeedTest_ Tests the components, as in sees if they load. Otherwise does nothing. """ return myThread = threading.currentThread() config = self.getConfig() self.nJobs = 2000 testJobGroup = self.createTestJobGroup() changer = ChangeState(config) cacheDir = os.path.join(self.testDir, 'test') for job in testJobGroup.jobs: job["outcome"] = "success" job.save() path = os.path.join(cacheDir, job['name']) os.makedirs(path) f = open('%s/%s.out' % (path, job['name']), 'w') f.write(job['name']) f.close() job.setCache(path) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') testJobArchiver = JobArchiverPoller(config=config) cProfile.runctx("testJobArchiver.algorithm()", globals(), locals(), filename="testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats(.2) return def testJobArchiverPollerAlertsSending_constructor(self): """ Cause exception (alert-worthy situation) in the JobArchiverPoller constructor. """ myThread = threading.currentThread() config = self.getConfig() handler, self.alertsReceiver = \ utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr) config.JobArchiver.logDir = "" config.JobArchiver.componentDir = "" # invoke exception and thus Alert message self.assertRaises(JobArchiverPollerException, JobArchiverPoller, config=config) # wait for the generated alert to arrive while len(handler.queue) == 0: time.sleep(0.3) print "%s waiting for alert to arrive ..." % inspect.stack()[0][3] self.alertsReceiver.shutdown() self.alertsReceiver = None # now check if the alert was properly sent self.assertEqual(len(handler.queue), 1) alert = handler.queue[0] self.assertEqual(alert["Source"], "JobArchiverPoller") def testJobArchiverPollerAlertsSending_cleanJobCache(self): """ Cause exception (alert-worthy situation) in the cleanJobCache method. """ myThread = threading.currentThread() config = self.getConfig() handler, self.alertsReceiver = \ utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr) testJobArchiver = JobArchiverPoller(config=config) # invoke the problem and thus Alert message job = dict(cache_dir=None) testJobArchiver.cleanJobCache(job) # wait for the generated alert to arrive while len(handler.queue) == 0: time.sleep(0.3) print "%s waiting for alert to arrive ..." % inspect.stack()[0][3] self.alertsReceiver.shutdown() self.alertsReceiver = None # now check if the alert was properly sent self.assertEqual(len(handler.queue), 1) alert = handler.queue[0] self.assertEqual(alert["Source"], testJobArchiver.__class__.__name__)
class BossAirTest(unittest.TestCase): """ Tests for the BossAir prototype """ sites = ['T2_US_UCSD', 'T2_TW_Taiwan', 'T1_CH_CERN', 'T2_US_Florida'] def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.tearDown() self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"], useDefault = False) self.testInit.setupCouch("bossair_t/jobs", "JobDump") self.testInit.setupCouch("bossair_t/fwjrs", "FWJRDump") self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.getJobs = self.daoFactory(classname = "Jobs.GetAllJobs") #Create sites in resourceControl resourceControl = ResourceControl() for site in self.sites: resourceControl.insertSite(siteName = site, seName = 'se.%s' % (site), cmsName = site, ceName = site, plugin = "CondorPlugin", pendingSlots = 1000, runningSlots = 2000) resourceControl.insertThreshold(siteName = site, taskType = 'Processing', \ maxSlots = 1000, pendingSlots = 1000) resourceControl.insertSite(siteName = 'Xanadu', seName = 'se.Xanadu',cmsName = site, ceName = 'Xanadu', plugin = "TestPlugin") resourceControl.insertThreshold(siteName = 'Xanadu', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) resourceControl.insertSite(siteName = 'jade-cms.hip.fi', seName = 'madhatter.csc.fi', cmsName = site, ceName = 'jade-cms.hip.fi', plugin = "ARCPlugin") resourceControl.insertThreshold(siteName = 'jade-cms.hip.fi', taskType = 'Processing', \ maxSlots = 100, pendingSlots = 100) # using this for glite submissions resourceControl.insertSite(siteName = 'grid-ce-01.ba.infn.it', seName = 'storm-se-01.ba.infn.it', cmsName = site, ceName = 'grid-ce-01.ba.infn.it', plugin = 'gLitePlugin') resourceControl.insertThreshold(siteName = 'grid-ce-01.ba.infn.it', taskType = 'Processing', \ maxSlots = 50, pendingSlots = 50) # Create user newuser = self.daoFactory(classname = "Users.New") newuser.execute(dn = "tapas", group_name = "phgroup", role_name = "cmsrole") # We actually need the user name self.user = getpass.getuser() # Change this to the working dir to keep track of error and log files from condor self.testInit.generateWorkDir() # Set heartbeat componentName = 'test' self.heartbeatAPI = HeartbeatAPI(componentName) self.heartbeatAPI.registerComponent() componentName = 'JobTracker' self.heartbeatAPI2 = HeartbeatAPI(componentName) self.heartbeatAPI2.registerComponent() return def tearDown(self): """ Database deletion """ #self.testInit.clearDatabase(modules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl", "WMCore.Agent.Database"]) self.testInit.delWorkDir() self.testInit.tearDownCouch() return def getConfig(self): """ _getConfig_ Build a basic BossAir config """ config = self.testInit.getConfiguration() config.section_("Agent") config.Agent.agentName = 'testAgent' config.Agent.componentName = 'test' config.Agent.useHeartbeat = False config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.section_("BossAir") config.BossAir.pluginNames = ['TestPlugin', 'CondorPlugin'] config.BossAir.pluginDir = 'WMCore.BossAir.Plugins' config.BossAir.UISetupScript = '/afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh' config.component_("JobSubmitter") config.JobSubmitter.logLevel = 'INFO' config.JobSubmitter.pollInterval = 1 config.JobSubmitter.pluginName = 'AirPlugin' config.JobSubmitter.pluginDir = 'JobSubmitter.Plugins' config.JobSubmitter.submitDir = os.path.join(self.testDir, 'submit') config.JobSubmitter.submitNode = os.getenv("HOSTNAME", 'stevia.hep.wisc.edu') config.JobSubmitter.submitScript = os.path.join(WMCore.WMInit.getWMBASE(), 'test/python/WMComponent_t/JobSubmitter_t', 'submit.sh') config.JobSubmitter.componentDir = os.path.join(os.getcwd(), 'Components') config.JobSubmitter.workerThreads = 2 config.JobSubmitter.jobsPerWorker = 200 config.JobSubmitter.gLiteConf = os.path.join(os.getcwd(), 'config.cfg') # JobTracker config.component_("JobTracker") config.JobTracker.logLevel = 'INFO' config.JobTracker.pollInterval = 1 # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL') config.JobStateMachine.couchDBName = "bossair_t" # JobStatusLite config.component_('JobStatusLite') config.JobStatusLite.componentDir = os.path.join(os.getcwd(), 'Components') config.JobStatusLite.stateTimeouts = {'Pending': 10, 'Running': 86400} config.JobStatusLite.pollInterval = 1 return config def createTestWorkload(self, workloadName = 'Test', emulator = True): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") rereco = workload.getTask("ReReco") taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.save(workloadName) return workload def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site = None, bl = [], wl = []): """ Creates a series of jobGroups for submissions """ jobGroupList = [] testWorkflow = Workflow(spec = workloadSpec, owner = "tapas", name = makeUUID(), task="basicWorkload/Production", owner_vogroup = 'phgroup', owner_vorole = 'cmsrole') testWorkflow.create() # Create subscriptions for i in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name = name) testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name = name, task = task, nJobs = nJobs, jobGroup = testJobGroup, fileset = testFileset, sub = testSubscription.exists(), site = site, bl = bl, wl = wl) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site = None, bl = [], wl = []): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn = "/singleLfn/%s/%s" %(name, n), size = 1024, events = 10) if site: testFile.setLocation(site) else: for tmpSite in self.sites: testFile.setLocation('se.%s' % (tmpSite)) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name = '%s-%i' %(name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob['custom']['location'] = f.getLocations()[0] testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob['owner'] = 'tapas' testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob['ownerDN'] = 'tapas' testJob['ownerRole'] = 'cmsrole' testJob['ownerGroup'] = 'phgroup' jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'),'w') pickle.dump(testJob, output) output.close() return testJob, testFile def createDummyJobs(self, nJobs, location = None): """ _createDummyJobs_ Create some dummy jobs """ if not location: location = self.sites[0] nameStr = makeUUID() testWorkflow = Workflow(spec = nameStr, owner = "tapas", name = nameStr, task="basicWorkload/Production", owner_vogroup = 'phgroup', owner_vorole = 'cmsrole') testWorkflow.create() testFileset = Fileset(name = nameStr) testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() jobList = [] for i in range(nJobs): testJob = Job(name = '%s-%i' % (nameStr, i)) testJob['location'] = location testJob['custom']['location'] = location testJob['userdn'] = 'tapas' testJob['owner'] = 'tapas' testJob['userrole'] = 'cmsrole' testJob['usergroup'] = 'phgroup' testJob.create(testJobGroup) jobList.append(testJob) return jobList @attr('integration') def testA_APITest(self): """ _APITest_ This is a commissioning test that has very little to do with anything except loading the code. """ #return myThread = threading.currentThread() config = self.getConfig() baAPI = BossAirAPI(config = config) # We should have loaded a plugin self.assertTrue('TestPlugin' in baAPI.plugins.keys()) result = myThread.dbi.processData("SELECT name FROM bl_status")[0].fetchall() statusList = [] for i in result: statusList.append(i.values()[0]) # We should have the plugin states in the database self.assertEqual(statusList.sort(), ['New', 'Dead', 'Gone'].sort()) # Create some jobs nJobs = 10 jobDummies = self.createDummyJobs(nJobs = nJobs) print jobDummies baAPI.createNewJobs(wmbsJobs = jobDummies) runningJobs = baAPI._listRunJobs() self.assertEqual(len(runningJobs), nJobs) newJobs = baAPI._loadByStatus(status = 'New') self.assertEqual(len(newJobs), nJobs) deadJobs = baAPI._loadByStatus(status = 'Dead') self.assertEqual(len(deadJobs), 0) raisesException = False self.assertRaises(BossAirException, baAPI._loadByStatus, status = 'FalseStatus') # Change the job status and update it for job in newJobs: job['status'] = 'Dead' baAPI._updateJobs(jobs = newJobs) # Test whether we see the job status as updated newJobs = baAPI._loadByStatus(status = 'New') self.assertEqual(len(newJobs), 0) deadJobs = baAPI._loadByStatus(status = 'Dead') self.assertEqual(len(deadJobs), nJobs) # Can we load by BossAir ID? loadedJobs = baAPI._loadByID(jobs = deadJobs) self.assertEqual(len(loadedJobs), nJobs) # Can we load via WMBS? loadedJobs = baAPI.loadByWMBS(wmbsJobs = jobDummies) self.assertEqual(len(loadedJobs), nJobs) # See if we can delete jobs baAPI._deleteJobs(jobs = deadJobs) # Confirm that they're gone deadJobs = baAPI._loadByStatus(status = 'Dead') self.assertEqual(len(deadJobs), 0) self.assertEqual(len(baAPI.jobs), 0) return @attr('integration') def testB_PluginTest(self): """ _PluginTest_ Now check that these functions worked if called through plugins Instead of directly. There are only three plugin """ #return myThread = threading.currentThread() config = self.getConfig() baAPI = BossAirAPI(config = config) # Create some jobs nJobs = 10 jobDummies = self.createDummyJobs(nJobs = nJobs, location = 'Xanadu') changeState = ChangeState(config) changeState.propagate(jobDummies, 'created', 'new') changeState.propagate(jobDummies, 'executing', 'created') # Prior to building the job, each job must have a plugin # and user assigned for job in jobDummies: job['plugin'] = 'TestPlugin' job['owner'] = 'tapas' baAPI.submit(jobs = jobDummies) newJobs = baAPI._loadByStatus(status = 'New') self.assertEqual(len(newJobs), nJobs) # Should be no more running jobs runningJobs = baAPI._listRunJobs() self.assertEqual(len(runningJobs), nJobs) # Test Plugin should complete all jobs baAPI.track() # Should be no more running jobs runningJobs = baAPI._listRunJobs() self.assertEqual(len(runningJobs), 0) # Check if they're complete completeJobs = baAPI.getComplete() self.assertEqual(len(completeJobs), nJobs) # Do this test because BossAir is specifically built # to keep it from finding completed jobs result = myThread.dbi.processData("SELECT id FROM bl_runjob")[0].fetchall() self.assertEqual(len(result), nJobs) baAPI.removeComplete(jobs = jobDummies) result = myThread.dbi.processData("SELECT id FROM bl_runjob")[0].fetchall() self.assertEqual(len(result), 0) return def testG_monitoringDAO(self): """ _monitoringDAO_ Because I need a test for the monitoring DAO """ return myThread = threading.currentThread() config = self.getConfig() changeState = ChangeState(config) baAPI = BossAirAPI(config = config) # Create some jobs nJobs = 10 jobDummies = self.createDummyJobs(nJobs = nJobs) # Prior to building the job, each job must have a plugin # and user assigned for job in jobDummies: job['plugin'] = 'TestPlugin' job['owner'] = 'tapas' job['location'] = 'T2_US_UCSD' job.save() baAPI.submit(jobs = jobDummies) results = baAPI.monitor() self.assertEqual(len(results), nJobs) for job in results: self.assertEqual(job['plugin'], 'CondorPlugin') return
class TaskArchiverTest(unittest.TestCase): """ TestCase for TestTaskArchiver module """ _setup_done = False _teardown = False _maxMessage = 10 OWNERDN = os.environ[ 'OWNERDN'] if 'OWNERDN' in os.environ else "Generic/OWNERDN" def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMCore.WMBS", "WMComponent.DBS3Buffer"], useDefault=False) self.databaseName = "taskarchiver_t_0" self.testInit.setupCouch("%s/workloadsummary" % self.databaseName, "WorkloadSummary") self.testInit.setupCouch("%s/jobs" % self.databaseName, "JobDump") self.testInit.setupCouch("%s/fwjrs" % self.databaseName, "FWJRDump") self.testInit.setupCouch("wmagent_summary_t", "WMStats") self.testInit.setupCouch("wmagent_summary_central_t", "WMStats") self.daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daofactory(classname="Jobs.GetAllJobs") self.inject = self.daofactory( classname="Workflow.MarkInjectedWorkflows") self.testDir = self.testInit.generateWorkDir() os.makedirs(os.path.join(self.testDir, 'specDir')) self.nJobs = 10 self.campaignName = 'aCampaign' self.alertsReceiver = None self.uploadPublishInfo = False self.uploadPublishDir = None return def tearDown(self): """ Database deletion """ myThread = threading.currentThread() self.testInit.clearDatabase(modules=["WMCore.WMBS"]) self.testInit.delWorkDir() self.testInit.tearDownCouch() if self.alertsReceiver: self.alertsReceiver.shutdown() self.alertsReceiver = None return def getConfig(self): """ _createConfig_ General config file """ config = self.testInit.getConfiguration() #self.testInit.generateWorkDir(config) config.section_("General") config.General.workDir = "." config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL", "cmssrv52.fnal.gov:5984") config.JobStateMachine.couchDBName = self.databaseName config.JobStateMachine.jobSummaryDBName = 'wmagent_summary_t' config.component_("JobCreator") config.JobCreator.jobCacheDir = os.path.join(self.testDir, 'testDir') config.component_("TaskArchiver") config.TaskArchiver.componentDir = self.testDir config.TaskArchiver.WorkQueueParams = {} config.TaskArchiver.pollInterval = 60 config.TaskArchiver.logLevel = 'INFO' config.TaskArchiver.timeOut = 0 config.TaskArchiver.histogramKeys = [ 'AvgEventTime', 'writeTotalMB', 'jobTime' ] config.TaskArchiver.histogramBins = 5 config.TaskArchiver.histogramLimit = 5 config.TaskArchiver.workloadSummaryCouchDBName = "%s/workloadsummary" % self.databaseName config.TaskArchiver.workloadSummaryCouchURL = config.JobStateMachine.couchurl config.TaskArchiver.centralWMStatsURL = '%s/wmagent_summary_central_t' % config.JobStateMachine.couchurl config.TaskArchiver.requireCouch = True config.TaskArchiver.uploadPublishInfo = self.uploadPublishInfo config.TaskArchiver.uploadPublishDir = self.uploadPublishDir config.TaskArchiver.userFileCacheURL = os.getenv( 'UFCURL', 'http://cms-xen38.fnal.gov:7725/userfilecache/') config.section_("ACDC") config.ACDC.couchurl = config.JobStateMachine.couchurl config.ACDC.database = config.JobStateMachine.couchDBName # Make the jobCacheDir os.mkdir(config.JobCreator.jobCacheDir) # addition for Alerts messaging framework, work (alerts) and control # channel addresses to which the component will be sending alerts # these are destination addresses where AlertProcessor:Receiver listens config.section_("Alert") config.Alert.address = "tcp://127.0.0.1:5557" config.Alert.controlAddr = "tcp://127.0.0.1:5559" config.section_("BossAir") config.BossAir.UISetupScript = '/afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh' config.BossAir.gliteConf = '/afs/cern.ch/cms/LCG/LCG-2/UI/conf/glite_wms_CERN.conf' config.BossAir.credentialDir = '/home/crab/ALL_SETUP/credentials/' config.BossAir.gLiteProcesses = 2 config.BossAir.gLitePrefixEnv = "/lib64/" config.BossAir.pluginNames = ["gLitePlugin"] config.BossAir.proxyDir = "/tmp/credentials" config.BossAir.manualProxyPath = os.environ[ 'X509_USER_PROXY'] if 'X509_USER_PROXY' in os.environ else None config.section_("Agent") config.Agent.serverDN = "/we/bypass/myproxy/logon" return config def createWorkload(self, workloadName='Test', emulator=True): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.setCampaign(self.campaignName) workload.save(workloadName) return workload def createTestJobGroup(self, config, name="TestWorkthrough", filesetName="TestFileset", specLocation="spec.xml", error=False, task="/TestWorkload/ReReco", multicore=False): """ Creates a group of several jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=specLocation, owner=self.OWNERDN, name=name, task=task, owner_vogroup="", owner_vorole="") testWorkflow.create() self.inject.execute(names=[name], injected=True) testWMBSFileset = Fileset(name=filesetName) testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12314])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) outputWMBSFileset = Fileset(name='%sOutput' % filesetName) outputWMBSFileset.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10) testFileC.addRun(Run(10, *[12312])) testFileC.setLocation('malpaquet') testFileC.create() outputWMBSFileset.addFile(testFileC) outputWMBSFileset.commit() outputWMBSFileset.markOpen(0) testWorkflow.addOutput('output', outputWMBSFileset) testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() changer = ChangeState(config) report1 = Report() report2 = Report() if error: path1 = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') elif multicore: path1 = os.path.join( WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/MulticoreReport.pkl") path2 = path1 else: path1 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'mergeReport1.pkl') path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') report1.load(filename=path1) report2.load(filename=path2) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') for i in range(self.nJobs): if i < self.nJobs / 2: testJobGroup.jobs[i]['fwjr'] = report1 else: testJobGroup.jobs[i]['fwjr'] = report2 changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'exhausted', 'jobfailed') changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup def createGiantJobSet(self, name, config, nSubs=10, nJobs=10, nFiles=1, spec="spec.xml"): """ Creates a massive set of jobs """ jobList = [] for i in range(0, nSubs): # Make a bunch of subscriptions localName = '%s-%i' % (name, i) testWorkflow = Workflow(spec=spec, owner=self.OWNERDN, name=localName, task="Test", owner_vogroup="", owner_vorole="") testWorkflow.create() testWMBSFileset = Fileset(name=localName) testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() filesToComplete = [] for j in range(0, nJobs): # Create jobs for each subscription testFileA = File(lfn="%s-%i-lfnA" % (localName, j), size=1024, events=10) testFileA.addRun( Run( 10, *[ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 ])) testFileA.setLocation('malpaquet') testFileA.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.commit() filesToComplete.append(testFileA) testJob = Job(name='%s-%i' % (localName, j)) testJob.addFile(testFileA) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) jobList.append(testJob) for k in range(0, nFiles): # Create output files testFile = File(lfn="%s-%i-output" % (localName, k), size=1024, events=10) testFile.addRun(Run(10, *[12312])) testFile.setLocation('malpaquet') testFile.create() testJobGroup.output.addFile(testFile) testJobGroup.output.commit() testJobGroup.commit() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') changer.propagate(testJobGroup.jobs, 'cleanout', 'success') testWMBSFileset.markOpen(0) testSubscription.completeFiles(filesToComplete) return jobList def testA_BasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(config=config, name=workload.name(), specLocation=workloadPath, error=False) # Create second workload testJobGroup2 = self.createTestJobGroup( config=config, name=workload.name(), filesetName="TestFileset_2", specLocation=workloadPath, task="/TestWorkload/ReReco/LogCollect") cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) cachePath2 = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "LogCollect") os.makedirs(cachePath2) self.assertTrue(os.path.exists(cachePath2)) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 2) workflowName = "TestWorkload" dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) jobdb = couchdb.connectDatabase("%s/jobs" % self.databaseName) fwjrdb = couchdb.connectDatabase("%s/fwjrs" % self.databaseName) jobs = jobdb.loadView("JobDump", "jobsByWorkflowName", options={ "startkey": [workflowName], "endkey": [workflowName, {}] })['rows'] self.assertEqual(len(jobs), 2 * self.nJobs) from WMCore.WMBS.CreateWMBSBase import CreateWMBSBase create = CreateWMBSBase() tables = [] for x in create.requiredTables: tables.append(x[2:]) testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() result = myThread.dbi.processData( "SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_fileset")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) # Make sure we deleted the directory self.assertFalse(os.path.exists(cachePath)) self.assertFalse( os.path.exists( os.path.join(self.testDir, 'workloadTest/TestWorkload'))) testWMBSFileset = Fileset(id=1) self.assertEqual(testWMBSFileset.exists(), False) workloadSummary = workdatabase.document(id="TestWorkload") # Check ACDC self.assertEqual(workloadSummary['ACDCServer'], sanitizeURL(config.ACDC.couchurl)['url']) # Check the output self.assertEqual(workloadSummary['output'].keys(), ['/Electron/MorePenguins-v0/RECO']) self.assertEqual( sorted(workloadSummary['output']['/Electron/MorePenguins-v0/RECO'] ['tasks']), ['/TestWorkload/ReReco', '/TestWorkload/ReReco/LogCollect']) # Check performance # Check histograms self.assertAlmostEquals( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['histogram'][0]['average'], 0.89405199999999996, places=2) self.assertEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['histogram'][0]['nEvents'], 10) # Check standard performance self.assertAlmostEquals( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['TotalJobCPU']['average'], 17.786300000000001, places=2) self.assertAlmostEquals( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['TotalJobCPU']['stdDev'], 0.0, places=2) # Check worstOffenders self.assertEqual( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['AvgEventTime']['worstOffenders'], [{ 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1 }, { 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 1 }, { 'logCollect': None, 'log': None, 'value': '0.894052', 'jobID': 2 }]) # Check retryData self.assertEqual(workloadSummary['retryData']['/TestWorkload/ReReco'], {'1': 10}) logCollectPFN = 'srm://srm-cms.cern.ch:8443/srm/managerv2?SFN=/castor/cern.ch/cms/store/logs/prod/2012/11/WMAgent/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8/Run206446-MinimumBias-Run2012D-v1-Tier1PromptReco-4af7e658-23a4-11e2-96c7-842b2b4671d8-AlcaSkimLogCollect-1-logs.tar' self.assertEqual(workloadSummary['logArchives'], { '/TestWorkload/ReReco/LogCollect': [logCollectPFN for _ in range(10)] }) # LogCollect task is made out of identical FWJRs # assert that it is identical for x in workloadSummary['performance'][ '/TestWorkload/ReReco/LogCollect']['cmsRun1'].keys(): if x in config.TaskArchiver.histogramKeys: continue for y in ['average', 'stdDev']: self.assertAlmostEquals( workloadSummary['performance'] ['/TestWorkload/ReReco/LogCollect']['cmsRun1'][x][y], workloadSummary['performance']['/TestWorkload/ReReco'] ['cmsRun1'][x][y], places=2) return def testB_testErrors(self): """ _testErrors_ Test with a failed FWJR """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(config=config, name=workload.name(), specLocation=workloadPath, error=True) cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() dbname = getattr(config.JobStateMachine, "couchDBName") couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname) workloadSummary = workdatabase.document(id=workload.name()) self.assertEqual( workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500) self.assertTrue(workloadSummary['errors']['/TestWorkload/ReReco'] ['cmsRun1'].has_key('99999')) self.assertEquals( workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'] ['99999']['runs'], {'10': [12312]}, "Wrong lumi information in the summary for failed jobs") # Check the failures by site histograms self.assertEqual( workloadSummary['histograms']['workflowLevel']['failuresBySite'] ['data']['T1_IT_CNAF']['Failed Jobs'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10) self.assertEqual( workloadSummary['histograms']['workflowLevel']['failuresBySite'] ['average']['Failed Jobs'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['average']['99999'], 10) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['average']['8020'], 10) self.assertEqual( workloadSummary['histograms']['workflowLevel']['failuresBySite'] ['stdDev']['Failed Jobs'], 0) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['stdDev']['99999'], 0) self.assertEqual( workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco'] ['cmsRun1']['errorsBySite']['stdDev']['8020'], 0) return def atestC_Profile(self): """ _Profile_ DON'T RUN THIS! """ return import cProfile, pstats myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10) testTaskArchiver = TaskArchiverPoller(config=config) cProfile.runctx("testTaskArchiver.algorithm()", globals(), locals(), filename="testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return def atestD_Timing(self): """ _Timing_ This is to see how fast things go. """ return myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10) testTaskArchiver = TaskArchiverPoller(config=config) startTime = time.time() testTaskArchiver.algorithm() stopTime = time.time() result = myThread.dbi.processData( "SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) testWMBSFileset = Fileset(id=1) self.assertEqual(testWMBSFileset.exists(), False) logging.info("TaskArchiver took %f seconds" % (stopTime - startTime)) def atestTaskArchiverPollerAlertsSending_notifyWorkQueue(self): """ Cause exception (alert-worthy situation) in the TaskArchiverPoller notifyWorkQueue method. """ return myThread = threading.currentThread() config = self.getConfig() testTaskArchiver = TaskArchiverPoller(config=config) # shall later be called directly from utils module handler, self.alertsReceiver = \ utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr) # prepare input such input which will go until where it expectantly # fails and shall send an alert # this will currently fail in the TaskArchiverPoller killSubscriptions # on trying to access .load() method which items of below don't have. # should anything change in the TaskArchiverPoller without modifying this # test accordingly, it may be failing ... print "failures 'AttributeError: 'dict' object has no attribute 'load' expected ..." subList = [{'id': 1}, {'id': 2}, {'id': 3}] testTaskArchiver.notifyWorkQueue(subList) # wait for the generated alert to arrive while len(handler.queue) < len(subList): time.sleep(0.3) print "%s waiting for alert to arrive ..." % inspect.stack()[0][3] self.alertsReceiver.shutdown() self.alertsReceiver = None # now check if the alert was properly sent (expect this many failures) self.assertEqual(len(handler.queue), len(subList)) alert = handler.queue[0] self.assertEqual(alert["Source"], "TaskArchiverPoller") def atestTaskArchiverPollerAlertsSending_killSubscriptions(self): """ Cause exception (alert-worthy situation) in the TaskArchiverPoller killSubscriptions method. (only 1 situation out of two tested). """ return myThread = threading.currentThread() config = self.getConfig() testTaskArchiver = TaskArchiverPoller(config=config) # shall later be called directly from utils module handler, self.alertsReceiver = \ utils.setUpReceiver(config.Alert.address, config.Alert.controlAddr) # will fail on calling .load() - regardless, the same except block numAlerts = 3 doneList = [{'id': x} for x in range(numAlerts)] # final re-raise is currently commented, so don't expect Exception here testTaskArchiver.killSubscriptions(doneList) # wait for the generated alert to arrive while len(handler.queue) < numAlerts: time.sleep(0.3) print "%s waiting for alert to arrive ..." % inspect.stack()[0][3] self.alertsReceiver.shutdown() self.alertsReceiver = None # now check if the alert was properly sent self.assertEqual(len(handler.queue), numAlerts) alert = handler.queue[0] self.assertEqual(alert["Source"], "TaskArchiverPoller") return def testE_multicore(self): """ _multicore_ Create a workload summary based on the multicore job report """ myThread = threading.currentThread() config = self.getConfig() workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl') workload = self.createWorkload(workloadName=workloadPath) testJobGroup = self.createTestJobGroup(config=config, name=workload.name(), specLocation=workloadPath, error=False, multicore=True) cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco") os.makedirs(cachePath) self.assertTrue(os.path.exists(cachePath)) dbname = config.TaskArchiver.workloadSummaryCouchDBName couchdb = CouchServer(config.JobStateMachine.couchurl) workdatabase = couchdb.connectDatabase(dbname) testTaskArchiver = TaskArchiverPoller(config=config) testTaskArchiver.algorithm() result = myThread.dbi.processData( "SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0, "No job should have survived") result = myThread.dbi.processData( "SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData( "SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) workloadSummary = workdatabase.document(id="TestWorkload") self.assertAlmostEquals( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['minMergeTime']['average'], 5.7624950408900002, places=2) self.assertAlmostEquals( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['numberOfMerges']['average'], 3.0, places=2) self.assertAlmostEquals( workloadSummary['performance']['/TestWorkload/ReReco']['cmsRun1'] ['averageProcessTime']['average'], 29.369966666700002, places=2) return # Requires a running UserFileCache to succeed. https://cmsweb.cern.ch worked for me # The environment variable OWNERDN needs to be set. Used to retrieve an already delegated proxy and contact the ufc @attr('integration') def testPublishJSONCreate(self): """ Re-run testA_BasicFunctionTest with data in DBSBuffer Make sure files are generated """ # Set up uploading and write them elsewhere since the test deletes them. self.uploadPublishInfo = True self.uploadPublishDir = self.testDir # Insert some DBSFiles testFileChildA = DBSBufferFile(lfn="/this/is/a/child/lfnA", size=1024, events=20) testFileChildA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChildB = DBSBufferFile(lfn="/this/is/a/child/lfnB", size=1024, events=20) testFileChildB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChildC = DBSBufferFile(lfn="/this/is/a/child/lfnC", size=1024, events=20) testFileChildC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileChildA.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER") testFileChildB.setDatasetPath("/Cosmics/USER-DATASET1-v1/USER") testFileChildC.setDatasetPath("/Cosmics/USER-DATASET2-v1/USER") testFileChildA.create() testFileChildB.create() testFileChildC.create() testFile = DBSBufferFile(lfn="/this/is/a/lfn", size=1024, events=10) testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO") testFile.create() testFileChildA.addParents([testFile["lfn"]]) testFileChildB.addParents([testFile["lfn"]]) testFileChildC.addParents([testFile["lfn"]]) myThread = threading.currentThread() self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) self.insertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow") workflowID = self.insertWorkflow.execute( requestName='TestWorkload', taskPath='TestWorkload/Analysis') myThread.dbi.processData( "update dbsbuffer_file set workflow=1 where id < 4") # Run the test again self.testA_BasicFunctionTest() # Reset default values self.uploadPublishInfo = False self.uploadPublishDir = None # Make sure the files are there self.assertTrue( os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.json'))) self.assertTrue( os.path.getsize( os.path.join(self.testDir, 'TestWorkload_publish.json')) > 100) self.assertTrue( os.path.exists( os.path.join(self.testDir, 'TestWorkload_publish.tgz'))) return
class PromptRecoTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("promptreco_t", "ConfigCache") self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("promptreco_t") self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def setupPromptSkimConfigObject(self): """ _setupPromptSkimConfigObject_ Creates a custom config object for testing of the skim functionality """ self.promptSkim = ConfigSection(name="Tier1Skim") self.promptSkim.SkimName = "TestSkim1" self.promptSkim.DataTier = "RECO" self.promptSkim.TwoFileRead = False self.promptSkim.ProcessingVersion = "PromptSkim-v1" self.promptSkim.ConfigURL = "http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi/CMSSW/Configuration/DataOps/python/prescaleskimmer.py?revision=1.1" def testPromptReco(self): """ _testPromptReco_ Create a Prompt Reconstruction workflow and verify it installs into WMBS correctly. """ testArguments = getTestArguments() testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset = mergedDQMFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return @attr("integration") def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ self.setupPromptSkimConfigObject() testArguments = getTestArguments() testArguments["PromptSkims"] = [self.promptSkim] testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnvPath"] = os.environ.get("EnvPath", None) testArguments["BinPath"] = os.environ.get("BinPath", None) testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") promptSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1") promptSkimWorkflow.load() self.assertEqual(len(promptSkimWorkflow.outputMap.keys()), 6, "Error: Wrong number of WF outputs.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: mergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedRecoFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged") mergedRecoFileset.loadData() promptSkimSubscription = Subscription(fileset = mergedRecoFileset, workflow = promptSkimWorkflow) promptSkimSubscription.loadData() self.assertEqual(promptSkimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(promptSkimSubscription["split_algo"], "FileBased", "Error: Wrong split algorithm. %s" % promptSkimSubscription["split_algo"]) unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for unmergedOutput in unmergedOutputs: unmergedPromptSkim = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedPromptSkim.loadData() promptSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % unmergedOutput) promptSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedPromptSkim, workflow = promptSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1CleanupUnmerged%s" % unmergedOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algorithm. %s" % cleanupSubscription["split_algo"]) recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") promptSkimLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive") promptSkimLogCollect.loadData() promptSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1LogCollect") promptSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = promptSkimLogCollect, workflow = promptSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: promptSkimMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod) promptSkimMergeLogCollect.loadData() promptSkimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/TestSkim1%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) promptSkimMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = promptSkimMergeLogCollect, workflow = promptSkimMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
class DBSUploadTest(unittest.TestCase): """ _DBSUploadTest_ TestCase for DBSUpload module """ _maxMessage = 10 def setUp(self): """ _setUp_ setUp function for unittest """ # Set constants self.couchDB = "config_test" self.configURL = "RANDOM;;URL;;NAME" self.configString = "This is a random string" self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", 'WMCore.Agent.Database'], useDefault=False) self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache") myThread = threading.currentThread() self.bufferFactory = DAOFactory( package="WMComponent.DBSBuffer.Database", logger=myThread.logger, dbinterface=myThread.dbi) self.buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = self.bufferFactory( classname="DBSBufferFiles.AddLocation") locationAction.execute(siteName="se1.cern.ch") locationAction.execute(siteName="se1.fnal.gov") locationAction.execute(siteName="malpaquet") # Set heartbeat self.componentName = 'JobSubmitter' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() # Set up a config cache configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=self.couchDB) configCache.createUserGroup(groupname="testGroup", username='******') self.testDir = self.testInit.generateWorkDir() psetPath = os.path.join(self.testDir, "PSet.txt") f = open(psetPath, 'w') f.write(self.configString) f.close() configCache.addConfig(newConfig=psetPath, psetHash=None) configCache.save() self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"], self.couchDB, configCache.getCouchID()) return def tearDown(self): """ _tearDown_ tearDown function for unittest """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def createConfig(self): """ _createConfig_ This creates the actual config file used by the component """ config = Configuration() #First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", os.getcwd()) config.section_("Agent") config.Agent.componentName = 'DBSUpload' config.Agent.useHeartbeat = False #Now the CoreDatabase information #This should be the dialect, dburl, etc config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.component_("DBSUpload") config.DBSUpload.pollInterval = 10 config.DBSUpload.logLevel = 'ERROR' config.DBSUpload.maxThreads = 1 config.DBSUpload.namespace = 'WMComponent.DBSUpload.DBSUpload' config.DBSUpload.componentDir = os.path.join(os.getcwd(), 'Components') config.DBSUpload.workerThreads = 4 config.section_("DBSInterface") config.DBSInterface.globalDBSUrl = 'http://vocms09.cern.ch:8880/cms_dbs_int_local_xx_writer/servlet/DBSServlet' config.DBSInterface.globalDBSVersion = 'DBS_2_0_9' config.DBSInterface.DBSUrl = 'http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet' config.DBSInterface.DBSVersion = 'DBS_2_0_9' config.DBSInterface.MaxFilesToCommit = 10 # addition for Alerts messaging framework, work (alerts) and control # channel addresses to which the component will be sending alerts # these are destination addresses where AlertProcessor:Receiver listens config.section_("Alert") config.Alert.address = "tcp://127.0.0.1:5557" config.Alert.controlAddr = "tcp://127.0.0.1:5559" # configure threshold of DBS upload queue size alert threshold # reference: trac ticket #1628 config.DBSUpload.alertUploadQueueSize = 2000 return config def injectWorkflow(self, workflowName='TestWorkflow', taskPath='/TestWorkflow/ReadingEvents', MaxWaitTime=10000, MaxFiles=10, MaxEvents=250000000, MaxSize=9999999999): """ _injectWorklow_ Inject a dummy worklow in DBSBuffer for testing, returns the workflow ID """ injectWorkflowDAO = self.buffer3Factory("InsertWorkflow") workflowID = injectWorkflowDAO.execute(workflowName, taskPath, MaxWaitTime, MaxFiles, MaxEvents, MaxSize) return workflowID def getFiles(self, name, tier, nFiles=12, site="malpaquet", workflowName=None, taskPath=None, noChild=False): """ Create some quick dummy test files """ if workflowName is not None and taskPath is not None: workflowId = self.injectWorkflow(workflowName=workflowName, taskPath=taskPath) else: workflowId = self.injectWorkflow() files = [] for f in range(0, nFiles): testFile = DBSBufferFile(lfn='%s-%s-%i' % (name, site, f), size=1024, events=20, checksums={'cksum': 1}, workflowId=workflowId) testFile.setAlgorithm(appName=name, appVer="CMSSW_3_1_1", appFam="RECO", psetHash="GIBBERISH", configContent=self.configURL) testFile.setDatasetPath("/%s/%s/%s" % (name, name, tier)) testFile.addRun(Run(1, *[f])) testFile.setGlobalTag("aGlobalTag") testFile.create() testFile.setLocation(site) files.append(testFile) if not noChild: testFileChild = DBSBufferFile(lfn='%s-%s-child' % (name, site), size=1024, events=10, checksums={'cksum': 1}, workflowId=workflowId) testFileChild.setAlgorithm(appName=name, appVer="CMSSW_3_1_1", appFam="RECO", psetHash="GIBBERISH", configContent=self.configURL) testFileChild.setDatasetPath("/%s/%s_2/RECO" % (name, name)) testFileChild.addRun(Run(1, *[45])) testFileChild.setGlobalTag("aGlobalTag") testFileChild.create() testFileChild.setLocation(site) testFileChild.addParents([x['lfn'] for x in files]) return files @attr('integration') def testA_basicUploadTest(self): """ _basicUploadTest_ Do everything simply once Create dataset, algo, files, blocks, upload them, mark as done, finish them, migrate them Also check the timeout """ myThread = threading.currentThread() config = self.createConfig() self.injectWorkflow(MaxWaitTime=3) config.DBSUpload.pollInterval = 4 name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) # In the first round we should create blocks for the first dataset # The child dataset should not be handled until the parent is uploaded testDBSUpload = DBSUploadPoller(config=config) testDBSUpload.algorithm() # First, see if there are any blocks # One in DBS, one not in DBS result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 2) self.assertEqual(result, [('InGlobalDBS', ), ('Open', )]) # Check to see if datasets and algos are in local DBS result = listAlgorithms(apiRef=localAPI, patternExe=name) self.assertEqual(len(result), 1) self.assertEqual(result[0]['ExecutableName'], name) result = listPrimaryDatasets(apiRef=localAPI, match=name) self.assertEqual(result, [name]) result = listProcessedDatasets(apiRef=localAPI, primary=name, dataTier="*") # Then check and see that the closed block made it into local DBS affectedBlocks = listBlocks(apiRef=localAPI, datasetPath=datasetPath) if affectedBlocks[0]['OpenForWriting'] == '0': self.assertEqual(affectedBlocks[1]['OpenForWriting'], '1') self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 10) self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 2) else: self.assertEqual(affectedBlocks[0]['OpenForWriting'], '1') self.assertEqual(affectedBlocks[1]['NumberOfFiles'], 10) self.assertEqual(affectedBlocks[0]['NumberOfFiles'], 2) # Check to make sure all the files are in local result = listDatasetFiles(apiRef=localAPI, datasetPath=datasetPath) fileLFNs = [x['lfn'] for x in files] for lfn in fileLFNs: self.assertTrue(lfn in result) # Make sure the child files aren't there flag = False try: listDatasetFiles(apiRef=localAPI, datasetPath='/%s/%s_2/%s' % (name, name, tier)) except Exception as ex: flag = True self.assertTrue(flag) # There should be one blocks in global # It should have ten files and be closed result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 1) for block in result: self.assertEqual(block['OpenForWriting'], '0') self.assertTrue(block['NumberOfFiles'] in [2, 10]) # Okay, deep breath. First round done # In the second round, the second block of the parent fileset should transfer # Make sure that the timeout functions work time.sleep(10) testDBSUpload.algorithm() result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 2) self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', )]) # Check to make sure all the files are in global result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath) for lfn in fileLFNs: self.assertTrue(lfn in result) # Make sure the child files aren't there flag = False try: listDatasetFiles(apiRef=localAPI, datasetPath='/%s/%s_2/%s' % (name, name, tier)) except Exception as ex: flag = True self.assertTrue(flag) # Third round # Both of the parent blocks should have transferred # So the child block should now transfer testDBSUpload.algorithm() result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ), ('Open', )]) flag = False try: result = listDatasetFiles(apiRef=localAPI, datasetPath='/%s/%s_2/%s' % (name, name, tier)) except Exception as ex: flag = True self.assertFalse(flag) self.assertEqual(len(result), 1) return @attr('integration') def testB_AlgoMigration(self): """ _AlgoMigration_ Test our ability to migrate multiple algos to global Do this by creating, mid-poll, two separate batches of files One with the same dataset but a different algo One with the same algo, but a different dataset See that they both get to global """ #raise nose.SkipTest myThread = threading.currentThread() config = self.createConfig() self.injectWorkflow(MaxWaitTime=20) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) testDBSUpload = DBSUploadPoller(config=config) testDBSUpload.algorithm() # There should now be one block result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 1) # Okay, by now, the first migration should have gone through. # Now create a second batch of files with the same dataset # but a different algo. for i in range(0, nFiles): testFile = DBSBufferFile(lfn='%s-batch2-%i' % (name, i), size=1024, events=20, checksums={'cksum': 1}, locations="malpaquet") testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1", appFam=tier, psetHash="GIBBERISH_PART2", configContent=self.configURL) testFile.setDatasetPath(datasetPath) testFile.addRun(Run(1, *[46])) testFile.create() # Have to do things twice to get parents testDBSUpload.algorithm() testDBSUpload.algorithm() # There should now be two blocks result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 2) # Now create another batch of files with the original algo # But in a different dataset for i in range(0, nFiles): testFile = DBSBufferFile(lfn='%s-batch3-%i' % (name, i), size=1024, events=20, checksums={'cksum': 1}, locations="malpaquet") testFile.setAlgorithm(appName=name, appVer="CMSSW_3_1_1", appFam=tier, psetHash="GIBBERISH", configContent=self.configURL) testFile.setDatasetPath('/%s/%s_3/%s' % (name, name, tier)) testFile.addRun(Run(1, *[46])) testFile.create() # Do it twice for parentage. testDBSUpload.algorithm() testDBSUpload.algorithm() # There should now be one block result = listBlocks(apiRef=globeAPI, datasetPath='/%s/%s_3/%s' % (name, name, tier)) self.assertEqual(len(result), 1) # Well, all the blocks got there, so we're done return @attr('integration') def testC_FailTest(self): """ _FailTest_ THIS TEST IS DANGEROUS! Figure out what happens when we trigger rollbacks """ myThread = threading.currentThread() config = self.createConfig() config.DBSUpload.abortStepTwo = True originalOut = sys.stdout originalErr = sys.stderr dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) testDBSUpload = DBSUploadPoller(config=config) try: testDBSUpload.algorithm() except Exception as ex: pass # Aborting in step two should result in no results result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 0) config.DBSUpload.abortStepTwo = False config.DBSUpload.abortStepThree = True testDBSUpload = DBSUploadPoller(config=config) try: testDBSUpload.algorithm() except Exception as ex: pass result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('Pending', ), ('Open', )]) result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1" )[0].fetchall() for res in result: self.assertEqual(res[0], 'READY') config.DBSUpload.abortStepThree = False self.injectWorkflow(MaxWaitTime=300) testDBSUpload = DBSUploadPoller(config=config) testDBSUpload.algorithm() # After this, one block should have been uploaded, one should still be open # This is the result of the pending block updating, and the open block staying open result = myThread.dbi.processData( "SELECT status, id FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', 3), ('Open', 4)]) # Check that one block got there result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 1) self.assertEqual(result[0]['NumberOfFiles'], 10) self.assertEqual(result[0]['NumberOfEvents'], 200) self.assertEqual(result[0]['BlockSize'], 10240) # Check that ten files got there result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 10) myThread.dbi.processData( "UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 1") testDBSUpload = DBSUploadPoller(config=config) time.sleep(3) testDBSUpload.algorithm() result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', )]) result = listDatasetFiles(apiRef=globeAPI, datasetPath=datasetPath) self.assertEqual(len(result), 12) fileLFNs = [x['lfn'] for x in files] for lfn in fileLFNs: self.assertTrue(lfn in result) testDBSUpload.algorithm() result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ), ('Open', )]) time.sleep(5) testDBSUpload.algorithm() time.sleep(2) result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(result, [('InGlobalDBS', ), ('InGlobalDBS', ), ('InGlobalDBS', )]) result = listDatasetFiles(apiRef=globeAPI, datasetPath='/%s/%s_2/%s' % (name, name, tier)) self.assertEqual(len(result), 1) sys.stdout = originalOut sys.stderr = originalErr return @attr('integration') def testD_Profile(self): """ _Profile_ Profile with cProfile and time various pieces """ return config = self.createConfig() name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 500 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) testDBSUpload = DBSUploadPoller(config=config) cProfile.runctx("testDBSUpload.algorithm()", globals(), locals(), filename="testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats(0.2) return @attr('integration') def testE_NoMigration(self): """ _NoMigration_ Test the DBSUpload system with no global migration """ myThread = threading.currentThread() config = self.createConfig() self.injectWorkflow(MaxWaitTime=3) config.DBSInterface.doGlobalMigration = False config.DBSUpload.pollInterval = 4 name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) # In the first round we should create blocks for the first dataset # The child dataset should not be handled until the parent is uploaded testDBSUpload = DBSUploadPoller(config=config) testDBSUpload.algorithm() # First, see if there are any blocks # One in DBS, one not in DBS result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_block")[0].fetchall() self.assertEqual(len(result), 2) self.assertEqual(result, [('InGlobalDBS', ), ('Open', )]) result = myThread.dbi.processData( "SELECT status FROM dbsbuffer_file WHERE dataset_algo = 1" )[0].fetchall() for r in result: self.assertEqual(r[0], 'GLOBAL') return @attr('integration') def testF_DBSUploadQueueSizeCheckForAlerts(self): """ Test will not trigger a real alert being sent unless doing some mocking of the methods used during DBSUploadPoller.algorithm() -> DBSUploadPoller.uploadBlocks() method. As done here, it probably can't be deterministic, yet the feature shall be checked. """ sizeLevelToTest = 1 myThread = threading.currentThread() config = self.createConfig() # threshold / value to check config.DBSUpload.alertUploadQueueSize = sizeLevelToTest # without this uploadBlocks method returns immediately name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = sizeLevelToTest + 1 files = self.getFiles(name=name, tier=tier, nFiles=nFiles) datasetPath = '/%s/%s/%s' % (name, name, tier) # load components that are necessary to check status # (this seems necessary, else some previous tests started failing) factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") dbsInterface = DBSInterface(config=config) localAPI = dbsInterface.getAPIRef() globeAPI = dbsInterface.getAPIRef(globalRef=True) testDBSUpload = DBSUploadPoller(config) # this is finally where the action (alert) should be triggered from testDBSUpload.algorithm() return def testG_closeSettingsPerWorkflow(self): """ _closeSettingsPerWorkflow_ Test our ability to close blocks depending on settings configured for individual workflows. This unit test that doesn't require an actual DBS instance to run. """ self.assertTrue( False, 'This unit test disabled since we do not have DBS2 mock') myThread = threading.currentThread() config = self.createConfig() config.DBSInterface.doGlobalMigration = False # First test, limit by number of files and timeout without new files name = "ThisIsATest_%s" % (makeUUID()) tier = "RECO" nFiles = 12 self.injectWorkflow(workflowName=name, taskPath='/%s/Test' % name, MaxFiles=5) self.getFiles(name=name, tier=tier, nFiles=nFiles, workflowName=name, taskPath='/%s/Test' % name) # Load components that are necessary to check status factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface") dbinterface = factory.loadObject("UploadToDBS") # Change the DBSUploadPoller imports on runtime from WMComponent.DBSUpload import DBSUploadPoller as MockDBSUploadPoller #MockDBSUploadPoller.DBSInterface = DBS2Interface # In the first round we should create blocks for the first dataset # The child dataset should not be handled until the parent is uploaded # First run creates 3 blocks, 2 are closed immediately and one is open testDBSUpload = MockDBSUploadPoller.DBSUploadPoller(config=config) testDBSUpload.algorithm() openBlocks = dbinterface.findOpenBlocks() closedBlocks = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'" )[0].fetchall() self.assertEqual(len(openBlocks), 1) self.assertEqual(len(closedBlocks), 2) globalFiles = myThread.dbi.processData( "SELECT id FROM dbsbuffer_file WHERE status = 'GLOBAL'" )[0].fetchall() notUploadedFiles = myThread.dbi.processData( "SELECT * FROM dbsbuffer_file WHERE status = 'NOTUPLOADED'" )[0].fetchall() self.assertEqual(len(globalFiles), 12) self.assertEqual(len(notUploadedFiles), 1) self.assertTrue('child' in notUploadedFiles[0][1]) testDBSUpload.algorithm() openBlocks = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'" )[0].fetchall() closedBlocks = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'" )[0].fetchall() self.assertEqual(len(openBlocks), 2) self.assertEqual(len(closedBlocks), 2) globalFiles = myThread.dbi.processData( "SELECT id FROM dbsbuffer_file WHERE status = 'GLOBAL'" )[0].fetchall() notUploadedFiles = myThread.dbi.processData( "SELECT * FROM dbsbuffer_file WHERE status = 'NOTUPLOADED'" )[0].fetchall() self.assertEqual(len(globalFiles), 13) self.assertEqual(len(notUploadedFiles), 0) # Test the timeout feature to close blocks myThread.dbi.processData( "UPDATE dbsbuffer_workflow SET block_close_max_wait_time = 0") testDBSUpload.algorithm() openBlocks = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'" )[0].fetchall() closedBlocks = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'" )[0].fetchall() self.assertEqual(len(openBlocks), 0) self.assertEqual(len(closedBlocks), 4) # Check the information that DBS received dbsBlocks = testDBSUpload.dbsInterface.blocks for dbsBlockName in dbsBlocks: dbsBlock = dbsBlocks[dbsBlockName] self.assertEqual(dbsBlock['OpenForWriting'], '0') self.assertTrue(dbsBlock['nFiles'] in (1, 2, 5)) # Second test, limit by number of events and timeout with new files name = "ThisIsATest_%s" % (makeUUID()) nFiles = 50 self.injectWorkflow(workflowName=name, taskPath='/%s/Test' % name, MaxFiles=45, MaxEvents=800, MaxWaitTime=10000) self.getFiles(name=name, tier=tier, nFiles=nFiles, workflowName=name, taskPath='/%s/Test' % name) testDBSUpload.algorithm() testDBSUpload.algorithm() openBlocks = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'" )[0].fetchall() closedBlocks = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'" )[0].fetchall() self.assertEqual(len(openBlocks), 2) self.assertEqual(len(closedBlocks), 5) # Throw 20 new file # Reset the timer such that the blocks appear to have been created 10001 seconds ago creationTime = int(time.time() - 10001) myThread.dbi.processData( "UPDATE dbsbuffer_block SET create_time = %d WHERE status != 'InGlobalDBS'" % creationTime) self.getFiles(name=name + '2', tier=tier, nFiles=20, workflowName=name, taskPath='/%s/Test' % name, noChild=True) # Now a new block will have to be created as the last one timed out testDBSUpload.algorithm() openBlocks = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block WHERE status != 'InGlobalDBS'" )[0].fetchall() closedBlocks = myThread.dbi.processData( "SELECT id FROM dbsbuffer_block WHERE status = 'InGlobalDBS'" )[0].fetchall() self.assertEqual(len(openBlocks), 1) self.assertEqual(len(closedBlocks), 7) dbsBlocks = testDBSUpload.dbsInterface.blocks for dbsBlockName in dbsBlocks: dbsBlock = dbsBlocks[dbsBlockName] if name in dbsBlockName: if dbsBlock['OpenForWriting'] == '1': self.assertEqual(dbsBlock['nFiles'], 20) else: self.assertTrue(dbsBlock['events'] in (10, 200, 800)) self.assertTrue(dbsBlock['nFiles'] in (1, 10, 40)) # Last test, check limitation by size name = "ThisIsATest_%s" % (makeUUID()) nFiles = 10 self.injectWorkflow(workflowName=name, taskPath='/%s/Test' % name, MaxFiles=45, MaxEvents=800, MaxSize=2048) self.getFiles(name=name, tier=tier, nFiles=nFiles, workflowName=name, taskPath='/%s/Test' % name) testDBSUpload.algorithm() dbsBlocks = testDBSUpload.dbsInterface.blocks for dbsBlockName in dbsBlocks: dbsBlock = dbsBlocks[dbsBlockName] if name in dbsBlockName: self.assertEqual(dbsBlock['events'], 40) self.assertEqual(dbsBlock['nFiles'], 2) self.assertEqual(dbsBlock['size'], 2048) return
class RetryManagerTest(unittest.TestCase): """ TestCase for TestRetryManager module """ def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) self.testInit.setupCouch("retry_manager_t/jobs", "JobDump") self.testInit.setupCouch("retry_manager_t/fwjrs", "FWJRDump") self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.getJobs = self.daofactory(classname = "Jobs.GetAllJobs") self.setJobTime = self.daofactory(classname = "Jobs.SetStateTime") self.increaseRetry = self.daofactory(classname = "Jobs.IncrementRetry") self.testDir = self.testInit.generateWorkDir() self.configFile = EmulatorSetup.setupWMAgentConfig() self.nJobs = 10 return def tearDown(self): """ Database deletion """ self.testInit.clearDatabase() self.testInit.delWorkDir() self.testInit.tearDownCouch() EmulatorSetup.deleteConfig(self.configFile) return def getConfig(self): """ _getConfig_ """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) # First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", self.testDir) config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.component_("RetryManager") config.RetryManager.logLevel = 'DEBUG' config.RetryManager.namespace = 'WMComponent.RetryManager.RetryManager' config.RetryManager.pollInterval = 10 # These are the cooloff times for the RetryManager, the times it waits # Before attempting resubmission config.RetryManager.section_("DefaultRetryAlgo") config.RetryManager.DefaultRetryAlgo.section_("default") config.RetryManager.DefaultRetryAlgo.default.coolOffTime = {'create': 120, 'submit': 120, 'job': 120} # Path to plugin directory config.RetryManager.pluginPath = 'WMComponent.RetryManager.PlugIns' config.RetryManager.WMCoreBase = WMCore.WMBase.getWMBASE() config.RetryManager.componentDir = os.path.join(os.getcwd(), 'Components') # ErrorHandler # Not essential, but useful for ProcessingAlgo config.component_("ErrorHandler") config.ErrorHandler.maxRetries = 5 # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', None) config.JobStateMachine.couchDBName = "retry_manager_t" return config def createTestJobGroup(self, nJobs, subType = "Processing", retryOnce = False): """ _createTestJobGroup_ Creates a group of several jobs """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = subType) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJobGroup.commit() if retryOnce: self.increaseRetry.execute(testJobGroup.jobs) return testJobGroup def testA_Create(self): """ WMComponent_t.RetryManager_t.RetryManager_t:testCreate() Mimics creation of component and test jobs failed in create stage. """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) return def testB_Submit(self): """ WMComponent_t.RetryManager_t.RetryManager_t:testSubmit() Mimics creation of component and test jobs failed in create stage. """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) return def testC_Job(self): """ WMComponent_t.RetryManager_t.RetryManager_t:testJob() Mimics creation of component and test jobs failed in create stage. """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') idList = self.getJobs.execute(state = 'JobCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) return def testD_SquaredAlgo(self): """ _testSquaredAlgo_ Test the squared algorithm to make sure it loads and works """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing' : 'SquaredAlgo'} config.RetryManager.section_("SquaredAlgo") config.RetryManager.SquaredAlgo.section_("Processing") config.RetryManager.SquaredAlgo.Processing.coolOffTime = {'create': 10, 'submit': 10, 'job': 10} changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 12) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) def testE_ExponentialAlgo(self): """ _testExponentialAlgo_ Test the exponential algorithm to make sure it loads and works """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing' : 'ExponentialAlgo'} config.RetryManager.section_("ExponentialAlgo") config.RetryManager.ExponentialAlgo.section_("Processing") config.RetryManager.ExponentialAlgo.Processing.coolOffTime = {'create': 10, 'submit': 10, 'job': 10} changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 12) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) def testF_LinearAlgo(self): """ _testLinearAlgo_ Test the linear algorithm to make sure it loads and works """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing' : 'LinearAlgo'} config.RetryManager.section_("LinearAlgo") config.RetryManager.LinearAlgo.section_("Processing") config.RetryManager.LinearAlgo.Processing.coolOffTime = {'create': 10, 'submit': 10, 'job': 10} changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') changer.propagate(testJobGroup.jobs, 'created', 'submitcooloff') changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 12) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) return def testG_ProcessingAlgo(self): """ _ProcessingAlgo_ Test for the ProcessingAlgo Prototype """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing' : 'ProcessingAlgo'} config.RetryManager.section_("ProcessingAlgo") config.RetryManager.ProcessingAlgo.section_("default") config.RetryManager.ProcessingAlgo.default.coolOffTime = {'create': 10, 'submit': 10, 'job': 10} changer = ChangeState(config) fwjrPath = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t", "fwjrs/badBackfillJobReport.pkl") report = Report() report.load(fwjrPath) for job in testJobGroup.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save(os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') testRetryManager = RetryManagerPoller(config) testRetryManager.algorithm() idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 1) report.save(os.path.join(j['cache_dir'], "Report.%i.pkl" % j['retry_count'])) config.RetryManager.ProcessingAlgo.default.OneMoreErrorCodes = [8020] testRetryManager2 = RetryManagerPoller(config) testRetryManager2.algorithm() idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 5) # Now test timeout testJobGroup2 = self.createTestJobGroup(nJobs = self.nJobs) # Cycle jobs for job in testJobGroup2.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save(os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup2.jobs, 'created', 'new') changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup2.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 0) config.RetryManager.ProcessingAlgo.default.OneMoreErrorCodes = [] config.RetryManager.ProcessingAlgo.default.MaxRunTime = 1 testRetryManager3 = RetryManagerPoller(config) testRetryManager3.algorithm() idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs * 2) for job in testJobGroup2.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 5) return def testH_PauseAlgo(self): """ _testH_PauseAlgo_ Test the pause algorithm, note that given pauseCount = n, the job will run first n + 1 times before being paused. After that it will be paused each n times """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing' : 'PauseAlgo'} config.RetryManager.section_("PauseAlgo") config.RetryManager.PauseAlgo.section_("Processing") config.RetryManager.PauseAlgo.Processing.coolOffTime = {'create': 20, 'submit': 20, 'job': 20} config.RetryManager.PauseAlgo.Processing.pauseCount = 2 changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) # Making sure that jobs are not created ahead of time for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 15) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 25) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') # Make sure that no change happens before timeout for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 75) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be paused for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 85) # Make sure that the plugin pauses them testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'jobpaused') self.assertEqual(len(idList), self.nJobs) # Emulating ops retrying the job changer.propagate(testJobGroup.jobs, 'created', 'jobpaused') # Making sure it did the right thing idList = self.getJobs.execute(state = 'created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 175) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 185) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 315) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'jobcooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 325) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'jobpaused') self.assertEqual(len(idList), self.nJobs) return def testI_MultipleJobTypes(self): """ _testI_MultipleJobTypes_ Check that we can configure different retry algorithms for different job types, including a default for nonspecified types. Also check that two job types can share the same retry algorithm but with different parameters """ #Let's create 4 job groups processingJobGroup = self.createTestJobGroup(nJobs = 10, retryOnce = True) productionJobGroup = self.createTestJobGroup(nJobs = 15, subType = "Production", retryOnce = True) mergeJobGroup = self.createTestJobGroup(nJobs = 20, subType = "Merge", retryOnce = True) skimJobGroup = self.createTestJobGroup(nJobs = 5, subType = "Skim", retryOnce = True) #Set an adequate config #Processing jobs get the PauseAlgo with pauseCount 4 #Production jobs get the ExponentialAlgo #Merge jobs get the PauseAlgo but with pauseCount 2 which is the default #Skim jobs are not configured, so they get the default SquaredAlgo config = self.getConfig() config.RetryManager.plugins = {'Processing' : 'PauseAlgo', 'Production' : 'ExponentialAlgo', 'Merge' : 'PauseAlgo', 'default' : 'SquaredAlgo'} config.RetryManager.section_("PauseAlgo") config.RetryManager.PauseAlgo.section_("Processing") config.RetryManager.PauseAlgo.Processing.coolOffTime = {'create': 30, 'submit': 30, 'job': 30} config.RetryManager.PauseAlgo.Processing.pauseCount = 4 config.RetryManager.PauseAlgo.section_("default") config.RetryManager.PauseAlgo.default.coolOffTime = {'create': 60, 'submit': 60, 'job': 60} config.RetryManager.PauseAlgo.default.pauseCount = 2 config.RetryManager.section_("ExponentialAlgo") config.RetryManager.ExponentialAlgo.section_("Production") config.RetryManager.ExponentialAlgo.Production.coolOffTime = {'create': 30, 'submit': 30, 'job': 30} config.RetryManager.ExponentialAlgo.section_("default") config.RetryManager.ExponentialAlgo.default.coolOffTime = {'create': 60, 'submit': 60, 'job': 60} config.RetryManager.section_("SquaredAlgo") config.RetryManager.SquaredAlgo.section_("Skim") config.RetryManager.SquaredAlgo.Skim.coolOffTime = {'create': 30, 'submit': 30, 'job': 30} config.RetryManager.SquaredAlgo.section_("default") config.RetryManager.SquaredAlgo.default.coolOffTime = {'create': 60, 'submit': 60, 'job': 60} #Start the state changer and RetryManager changer = ChangeState(config) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) #Create the jobs for the first time changer.propagate(processingJobGroup.jobs, 'created', 'new') # Let's start with the processing jobs and the pauseAlgo for count in range(1,5): #Fail the jobs changer.propagate(processingJobGroup.jobs, 'executing', 'created') changer.propagate(processingJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(processingJobGroup.jobs, 'jobcooloff', 'jobfailed') #Check that the cooloff time is strictly enforced #First a job time just below the cooloff time for job in processingJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 30*pow(count,2) + 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCoolOff') self.assertEqual(len(idList), len(processingJobGroup.jobs), "Jobs went into cooloff without the proper timing") #Now above the cooloff time for job in processingJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 30*pow(count,2) - 5) testRetryManager.algorithm(None) #Make sure the jobs get created again or go to paused if count < 4: idList = self.getJobs.execute(state = 'created') else: idList = self.getJobs.execute(state = 'jobpaused') self.assertEqual(len(idList), len(processingJobGroup.jobs), "Jobs didn't change state correctly") #Unpause them so they don't interfere with subsequent tests changer.propagate(processingJobGroup.jobs, 'created', 'jobpaused') changer.propagate(processingJobGroup.jobs, 'executing', 'created') #Now the production jobs and the exponential algo changer.propagate(productionJobGroup.jobs, 'created', 'new') for count in range(1,3): changer.propagate(productionJobGroup.jobs, 'executing', 'created') changer.propagate(productionJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(productionJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in productionJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - pow(30,count) + 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCoolOff') self.assertEqual(len(idList), len(productionJobGroup.jobs), "Jobs went into cooloff without the proper timing") for job in productionJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - pow(30,count) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'created') self.assertEqual(len(idList), len(productionJobGroup.jobs), "Jobs didn't change state correctly") #Send them to executing changer.propagate(productionJobGroup.jobs, 'executing', 'created') #Now the merge jobs and the paused algo with different parameters changer.propagate(mergeJobGroup.jobs, 'created', 'new') for count in range(1,3): changer.propagate(mergeJobGroup.jobs, 'executing', 'created') changer.propagate(mergeJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(mergeJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in mergeJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 30*pow(count,2) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCoolOff') self.assertEqual(len(idList), len(mergeJobGroup.jobs), "Jobs went into cooloff without the proper timing") for job in mergeJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 60*pow(count,2) - 5) testRetryManager.algorithm(None) if count < 2: idList = self.getJobs.execute(state = 'created') else: idList = self.getJobs.execute(state = 'jobpaused') self.assertEqual(len(idList), len(mergeJobGroup.jobs), "Jobs didn't change state correctly") #Send them to executing changer.propagate(mergeJobGroup.jobs, 'created', 'jobpaused') changer.propagate(mergeJobGroup.jobs, 'executing', 'created') #Now the skim jobs and the squared algo changer.propagate(skimJobGroup.jobs, 'created', 'new') for count in range(1,3): changer.propagate(skimJobGroup.jobs, 'executing', 'created') changer.propagate(skimJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(skimJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in skimJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 30*pow(count,2) + 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'JobCoolOff') self.assertEqual(len(idList), len(skimJobGroup.jobs), "Jobs went into cooloff without the proper timing") for job in skimJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 30*pow(count,2) - 5) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'created') self.assertEqual(len(idList), len(skimJobGroup.jobs), "Jobs didn't change state correctly") def testY_MultipleIterations(self): """ _MultipleIterations_ Paranoia based check to see if I'm saving class instances correctly """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'submitfailed', 'Created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) # Make a new jobGroup for a second run testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) # Set job state changer.propagate(testJobGroup.jobs, 'submitfailed', 'created') changer.propagate(testJobGroup.jobs, 'submitcooloff', 'submitfailed') # Set them to go off for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 200) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'SubmitCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs * 2) return def testZ_Profile(self): """ _Profile_ Do a basic profiling of the algo """ return import cProfile, pstats nJobs = 1000 testJobGroup = self.createTestJobGroup(nJobs = nJobs) config = self.getConfig() changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'createfailed', 'new') changer.propagate(testJobGroup.jobs, 'createcooloff', 'createfailed') idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), nJobs) testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 50) testRetryManager.algorithm(None) idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID = job["id"], stateTime = int(time.time()) - 150) startTime = time.time() #cProfile.runctx("testRetryManager.algorithm()", globals(), locals(), filename = "profStats.stat") testRetryManager.algorithm(None) stopTime = time.time() idList = self.getJobs.execute(state = 'CreateCooloff') self.assertEqual(len(idList), 0) idList = self.getJobs.execute(state = 'New') self.assertEqual(len(idList), nJobs) print("Took %f seconds to run polling algo" % (stopTime - startTime)) p = pstats.Stats('profStats.stat') p.sort_stats('cumulative') p.print_stats(0.2) return
class JobArchiverTest(EmulatedUnitTestCase): """ TestCase for TestJobArchiver module """ _maxMessage = 10 def setUp(self): """ setup for test. """ super(JobArchiverTest, self).setUp() myThread = threading.currentThread() super(JobArchiverTest, self).setUp() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() # self.tearDown() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.testInit.setupCouch("jobarchiver_t_0/jobs", "JobDump") self.testInit.setupCouch("jobarchiver_t_0/fwjrs", "FWJRDump") self.daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daofactory(classname="Jobs.GetAllJobs") self.testDir = self.testInit.generateWorkDir(deleteOnDestruction=False) self.nJobs = 10 self.configFile = EmulatorSetup.setupWMAgentConfig() return def tearDown(self): """ Database deletion """ self.testInit.clearDatabase(modules=["WMCore.WMBS"]) self.testInit.tearDownCouch() self.testInit.delWorkDir() EmulatorSetup.deleteConfig(self.configFile) super(JobArchiverTest, self).tearDown() return def getConfig(self): """ _createConfig_ General config file """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) # First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", os.getcwd()) config.General.WorkDir = os.getenv("TESTDIR", os.getcwd()) # Now the CoreDatabase information # This should be the dialect, dburl, etc config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL", "cmssrv48.fnal.gov:5984") config.JobStateMachine.couchDBName = "jobarchiver_t_0" config.component_("JobArchiver") config.JobArchiver.pollInterval = 60 config.JobArchiver.logLevel = 'INFO' # config.JobArchiver.logDir = os.path.join(self.testDir, 'logs') config.JobArchiver.componentDir = self.testDir config.JobArchiver.numberOfJobsToCluster = 1000 config.component_('WorkQueueManager') config.WorkQueueManager.namespace = "WMComponent.WorkQueueManager.WorkQueueManager" config.WorkQueueManager.componentDir = config.General.workDir + "/WorkQueueManager" config.WorkQueueManager.level = 'LocalQueue' config.WorkQueueManager.logLevel = 'DEBUG' config.WorkQueueManager.couchurl = 'https://None' config.WorkQueueManager.dbname = 'whatever' config.WorkQueueManager.inboxDatabase = 'whatever2' config.WorkQueueManager.queueParams = {} config.WorkQueueManager.queueParams["ParentQueueCouchUrl"] = "https://cmsweb.cern.ch/couchdb/workqueue" return config def createTestJobGroup(self): """ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for _ in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup def testBasicFunctionTest(self): """ _BasicFunctionTest_ Tests the components, by seeing if they can process a simple set of closeouts """ myThread = threading.currentThread() config = self.getConfig() testJobGroup = self.createTestJobGroup() changer = ChangeState(config) cacheDir = os.path.join(self.testDir, 'test') if not os.path.isdir(cacheDir): os.mkdir(cacheDir) # if os.path.isdir(config.JobArchiver.logDir): # shutil.rmtree(config.JobArchiver.logDir) for job in testJobGroup.jobs: myThread.transaction.begin() job["outcome"] = "success" job.save() myThread.transaction.commit() path = os.path.join(cacheDir, job['name']) os.makedirs(path) f = open('%s/%s.out' % (path, job['name']), 'w') f.write(job['name']) f.close() job.setCache(path) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'success', 'complete') testJobArchiver = JobArchiverPoller(config=config) testJobArchiver.algorithm() result = myThread.dbi.processData( "SELECT wmbs_job_state.name FROM wmbs_job_state INNER JOIN wmbs_job ON wmbs_job.state = wmbs_job_state.id")[ 0].fetchall() for val in result: self.assertEqual(val.values(), ['cleanout']) dirList = os.listdir(cacheDir) for job in testJobGroup.jobs: self.assertEqual(job["name"] in dirList, False) logPath = os.path.join(config.JobArchiver.componentDir, 'logDir', 'w', 'wf001', 'JobCluster_0') logList = os.listdir(logPath) for job in testJobGroup.jobs: self.assertEqual('Job_%i.tar.bz2' % (job['id']) in logList, True, 'Could not find transferred tarball for job %i' % (job['id'])) pipe = Popen(['tar', '-jxvf', os.path.join(logPath, 'Job_%i.tar.bz2' % (job['id']))], stdout=PIPE, stderr=PIPE, shell=False) pipe.wait() # filename = '%s/%s/%s.out' %(cacheDir[1:], job['name'], job['name']) filename = 'Job_%i/%s.out' % (job['id'], job['name']) self.assertEqual(os.path.isfile(filename), True, 'Could not find file %s' % (filename)) f = open(filename, 'r') fileContents = f.readlines() f.close() self.assertEqual(fileContents[0].find(job['name']) > -1, True) shutil.rmtree('Job_%i' % (job['id'])) if os.path.isfile('Job_%i.tar.bz2' % (job['id'])): os.remove('Job_%i.tar.bz2' % (job['id'])) return @attr('integration') def testSpeedTest(self): """ _SpeedTest_ Tests the components, as in sees if they load. Otherwise does nothing. """ return
class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl" ], useDefault=False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = MockDBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=threading.currentThread().logger, dbinterface=threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual( list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job=1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule="OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size=1, max_merge_size=2, max_merge_events=3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule="OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBased", files_per_job=50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule="Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job=1, include_parents=True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) return testWorkload def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = fakeSiteDB() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname="Locations.New") self.ses = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName=site, seName=self.siteDB.cmsNametoSE(site)[0]) self.ses.append(self.siteDB.cmsNametoSE(site)[0]) def createWMSpec(self, name='ReRecoWorkload'): factory = ReRecoWorkloadFactory() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") wmspec.setSubscriptionInformation(custodialSites=[], nonCustodialSites=[], autoApproveSites=[], priority="Low", custodialSubType="Move") return wmspec def createMCWMSpec(self, name='MonteCarloWorkload'): wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = MockDBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask=None, parentFlag=False, detail=False): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, topLevelTask.name(), block, mask, cachepath=self.workDir) if block: if parentFlag: block = self.dbs.getFileBlockWithParents(block)[block] else: block = self.dbs.getFileBlock(block)[block] sub, files = wmbs.createSubscriptionAndAddFiles(block=block) if detail: return wmbs, sub, files else: return wmbs def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config=config) # Create nine jobs self.setupForKillTest(baAPI=baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#2" #add run blacklist self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#2" # Run Whitelist self.topLevelTask.setInputRunWhitelist([2]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsOK(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['1'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['1,1'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsKO(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = [ '123454321' ] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = [ '123,123' ] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#1" wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) def testParentage(self): """ 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ block = self.dataset + "#1" wmbs, sub, numFiles = self.createWMBSHelperWithTopTask( self.wmspec, block, parentFlag=False, detail=True) # file creation without parents self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # no parent per child self.assertEqual(len(child["parents"]), 0) wmbs, sub, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=True, detail=True) self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # one parent per child self.assertEqual(len(child["parents"]), 1) def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(self.ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
class JobTrackerTest(EmulatedUnitTestCase): """ TestCase for TestJobTracker module """ _maxMessage = 10 def setUp(self): """ setup for test. """ super(JobTrackerTest, self).setUp() myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() # self.testInit.clearDatabase(modules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl"]) self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl" ], useDefault=False) self.testInit.setupCouch("jobtracker_t/jobs", "JobDump") self.testInit.setupCouch("jobtracker_t/fwjrs", "FWJRDump") self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.getJobs = self.daoFactory(classname="Jobs.GetAllJobs") # Create sites in resourceControl resourceControl = ResourceControl() resourceControl.insertSite(siteName='malpaquet', pnn='se.malpaquet', ceName='malpaquet', plugin="CondorPlugin") resourceControl.insertThreshold(siteName='malpaquet', taskType='Processing', \ maxSlots=10000, pendingSlots=10000) locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="malpaquet", pnn="malpaquet", ceName="malpaquet", plugin="CondorPlugin") # Create user newuser = self.daoFactory(classname="Users.New") newuser.execute(dn="jchurchill") # We actually need the user name self.user = getpass.getuser() self.testDir = self.testInit.generateWorkDir() self.configFile = EmulatorSetup.setupWMAgentConfig() def tearDown(self): """ Database deletion """ self.testInit.clearDatabase(modules=[ "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl" ]) self.testInit.delWorkDir() self.testInit.tearDownCouch() EmulatorSetup.deleteConfig(self.configFile) return def getConfig(self): """ _getConfig_ Build a basic JobTracker config """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) config.section_("Agent") config.Agent.agentName = 'testAgent' config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") # JobTracker config.component_("JobTracker") config.JobTracker.logLevel = 'INFO' config.JobTracker.pollInterval = 10 config.JobTracker.trackerName = 'CondorTracker' config.JobTracker.pluginDir = 'WMComponent.JobTracker.Plugins' config.JobTracker.componentDir = os.path.join(os.getcwd(), 'Components') config.JobTracker.runTimeLimit = 7776000 # Jobs expire after 90 days config.JobTracker.idleTimeLimit = 7776000 config.JobTracker.heldTimeLimit = 7776000 config.JobTracker.unknTimeLimit = 7776000 config.component_("JobSubmitter") config.JobSubmitter.logLevel = 'INFO' config.JobSubmitter.maxThreads = 1 config.JobSubmitter.pollInterval = 10 config.JobSubmitter.pluginName = 'AirPlugin' config.JobSubmitter.pluginDir = 'JobSubmitter.Plugins' config.JobSubmitter.submitDir = os.path.join(self.testDir, 'submit') config.JobSubmitter.submitNode = os.getenv("HOSTNAME", 'badtest.fnal.gov') # config.JobSubmitter.submitScript = os.path.join(os.getcwd(), 'submit.sh') config.JobSubmitter.submitScript = os.path.join( WMCore.WMInit.getWMBASE(), 'test/python/WMComponent_t/JobSubmitter_t', 'submit.sh') config.JobSubmitter.componentDir = os.path.join( os.getcwd(), 'Components') config.JobSubmitter.workerThreads = 2 config.JobSubmitter.jobsPerWorker = 200 config.JobSubmitter.gLiteConf = os.path.join(os.getcwd(), 'config.cfg') # BossAir config.component_("BossAir") config.BossAir.pluginNames = ['TestPlugin', 'CondorPlugin'] config.BossAir.pluginDir = 'WMCore.BossAir.Plugins' # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', 'cmssrv52.fnal.gov:5984') config.JobStateMachine.couchDBName = "jobtracker_t" return config def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name='%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup @attr('integration') def testA_CondorTest(self): """ _CondorTest_ Because I don't want this test to be submitter dependent: Create a dummy condor job. Submit a dummy condor job. Track it. Kill it. Exit """ # This has to be run with an empty queue nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) nJobs = 10 jobCE = 'cmsosgce.fnal.gov/jobmanager-condor' # Create directories cacheDir = os.path.join(self.testDir, 'CacheDir') submitDir = os.path.join(self.testDir, 'SubmitDir') if not os.path.isdir(cacheDir): os.makedirs(cacheDir) if not os.path.isdir(submitDir): os.makedirs(submitDir) # Get config config = self.getConfig() # Get jobGroup testJobGroup = self.createTestJobs(nJobs=nJobs, cacheDir=cacheDir) # Propogate jobs changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nJobs) jobTracker = JobTrackerPoller(config) jobTracker.setup() # First iteration # There are no jobs in the tracker, # The tracker should register the jobs as missing # This should tell it that they've finished # So the tracker should send them onwards jobTracker.algorithm() result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nJobs) result = self.getJobs.execute(state='complete', jobType="Processing") self.assertEqual(len(result), 0) # Second iteration # Reset the jobs # This time submit them to the queue # The jobs should remain in holding changer.propagate(testJobGroup.jobs, 'executing', 'created') result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nJobs) # Create a submit script createSubmitScript(submitDir) jobPackage = os.path.join(self.testDir, 'JobPackage.pkl') f = open(jobPackage, 'w') f.write(' ') f.close() sandbox = os.path.join(self.testDir, 'sandbox.box') f = open(sandbox, 'w') f.write(' ') f.close() for job in testJobGroup.jobs: job['plugin'] = 'CondorPlugin' job['userdn'] = 'jchurchill' job['custom'] = {'location': 'malpaquet'} job['cache_dir'] = self.testDir job['sandbox'] = sandbox job['packageDir'] = self.testDir info = {} info['packageDir'] = self.testDir info['index'] = 0 info['sandbox'] = sandbox jobTracker.bossAir.submit(jobs=testJobGroup.jobs, info=info) time.sleep(1) # All jobs should be running nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) # Run the algorithm. After this # all jobs should still be running jobTracker.algorithm() # Are jobs in the right state? result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nJobs) result = self.getJobs.execute(state='Complete', jobType="Processing") self.assertEqual(len(result), 0) # Are jobs still in the condor_q nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) # Then we're done jobTracker.bossAir.kill(jobs=testJobGroup.jobs) # No jobs should be left nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) jobTracker.algorithm() # Are jobs in the right state? result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), 0) result = self.getJobs.execute(state='Complete', jobType="Processing") self.assertEqual(len(result), nJobs) # This is optional if you want to look at what # files were actually created during running # if os.path.isdir('testDir'): # shutil.rmtree('testDir') # shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir')) return @attr('integration') def testB_ReallyLongTest(self): """ _ReallyLongTest_ Run a really long test using the condor plugin """ # This has to be run with an empty queue nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) # This has to be run with an empty queue nRunning = getCondorRunningJobs(self.user) self.assertEqual( nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) nJobs = 500 jobCE = 'cmsosgce.fnal.gov/jobmanager-condor' # Create directories cacheDir = os.path.join(self.testDir, 'CacheDir') submitDir = os.path.join(self.testDir, 'SubmitDir') if not os.path.isdir(cacheDir): os.makedirs(cacheDir) if not os.path.isdir(submitDir): os.makedirs(submitDir) # Get config config = self.getConfig() # Get jobGroup testJobGroup = self.createTestJobs(nJobs=nJobs, cacheDir=cacheDir) # Propogate jobs changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') jobTracker = JobTrackerPoller(config) jobTracker.setup() # Now create some jobs for job in testJobGroup.jobs[:(nJobs // 2)]: jdl = createJDL(jobID=job['id'], directory=submitDir, jobCE=jobCE) jdlFile = os.path.join(submitDir, 'condorJDL_%i.jdl' % (job['id'])) handle = open(jdlFile, 'w') handle.writelines(jdl) handle.close() command = ["condor_submit", jdlFile] pipe = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) pipe.communicate() startTime = time.time() cProfile.runctx("jobTracker.algorithm()", globals(), locals(), filename="testStats.stat") # jobTracker.algorithm() stopTime = time.time() # Are jobs in the right state? result = self.getJobs.execute(state='Executing', jobType="Processing") self.assertEqual(len(result), nJobs // 2) result = self.getJobs.execute(state='Complete', jobType="Processing") self.assertEqual(len(result), nJobs // 2) # Then we're done killList = [x['id'] for x in testJobGroup.jobs] jobTracker.killJobs(jobList=killList) # No jobs should be left nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) print("Process took %f seconds to process %i classAds" % ((stopTime - startTime), nJobs // 2)) p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats()
class JobSubmitterCachingTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ Set everything up. """ super(JobSubmitterCachingTest, self).setUp() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl" ], useDefault=False) self.testInit.setupCouch("jobsubmittercaching_t/jobs", "JobDump") self.testInit.setupCouch("jobsubmittercaching_t/fwjrs", "FWJRDump") resourceControl = ResourceControl() for siteName in ["T1_US_FNAL", "T1_UK_RAL"]: resourceControl.insertSite(siteName=siteName, pnn="%s_Disk" % (siteName), ceName=siteName, plugin="SimpleCondorPlugin", cmsName=siteName) resourceControl.insertThreshold(siteName=siteName, taskType="Processing", maxSlots=10000, pendingSlots=10000) self.testDir = self.testInit.generateWorkDir() self.configFile = EmulatorSetup.setupWMAgentConfig() return def tearDown(self): """ _tearDown_ Tear everything down. """ self.testInit.clearDatabase() self.testInit.delWorkDir() self.testInit.tearDownCouch() EmulatorSetup.deleteConfig(self.configFile) return def createConfig(self): """ _createConfig_ Create a config for the JobSubmitter. These parameters are still pulled from the environment. """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) config.component_("Agent") config.Agent.isDocker = False config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL") config.JobStateMachine.couchDBName = "jobsubmittercaching_t" config.section_("BossAir") config.BossAir.pluginDir = "WMCore.BossAir.Plugins" config.BossAir.pluginNames = ["SimpleCondorPlugin"] config.BossAir.nCondorProcesses = 1 config.component_("JobSubmitter") config.JobSubmitter.submitDir = self.testDir config.JobSubmitter.submitScript = os.path.join( getTestBase(), 'WMComponent_t/JobSubmitter_t', 'submit.sh') return config def injectJobs(self): """ _injectJobs_ Inject two workflows into WMBS and save the job objects to disk. """ testWorkflowA = Workflow(spec="specA.pkl", owner="Steve", name="wf001", task="TestTaskA") testWorkflowA.create() testWorkflowB = Workflow(spec="specB.pkl", owner="Steve", name="wf002", task="TestTaskB") testWorkflowB.create() testFileset = Fileset("testFileset") testFileset.create() testSubA = Subscription(fileset=testFileset, workflow=testWorkflowA) testSubA.create() testSubB = Subscription(fileset=testFileset, workflow=testWorkflowB) testSubB.create() testGroupA = JobGroup(subscription=testSubA) testGroupA.create() testGroupB = JobGroup(subscription=testSubB) testGroupB.create() stateChanger = ChangeState(self.createConfig(), "jobsubmittercaching_t") for i in range(10): newFile = File(lfn="testFile%s" % i, locations=set(["se.T1_US_FNAL", "se.T1_UK_RAL"])) newFile.create() newJobA = Job(name="testJobA-%s" % i, files=[newFile]) newJobA["workflow"] = "wf001" newJobA["possiblePSN"] = ["T1_US_FNAL"] newJobA["sandbox"] = "%s/somesandbox" % self.testDir newJobA["owner"] = "Steve" jobCacheDir = os.path.join(self.testDir, "jobA-%s" % i) os.mkdir(jobCacheDir) newJobA["cache_dir"] = jobCacheDir newJobA["type"] = "Processing" newJobA['requestType'] = 'ReReco' newJobA.create(testGroupA) jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "wb") pickle.dump(newJobA, jobHandle) jobHandle.close() stateChanger.propagate([newJobA], "created", "new") newJobB = Job(name="testJobB-%s" % i, files=[newFile]) newJobB["workflow"] = "wf001" newJobB["possiblePSN"] = ["T1_UK_RAL"] newJobB["sandbox"] = "%s/somesandbox" % self.testDir newJobB["owner"] = "Steve" jobCacheDir = os.path.join(self.testDir, "jobB-%s" % i) os.mkdir(jobCacheDir) newJobB["cache_dir"] = jobCacheDir newJobB["type"] = "Processing" newJobB['requestType'] = 'ReReco' newJobB.create(testGroupB) jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "wb") pickle.dump(newJobB, jobHandle) jobHandle.close() stateChanger.propagate([newJobB], "created", "new") return def testCaching(self): """ _testCaching_ Verify that JobSubmitter caching works. """ config = self.createConfig() mySubmitterPoller = JobSubmitterPoller(config) mySubmitterPoller.getThresholds() mySubmitterPoller.refreshCache() self.assertEqual(len(mySubmitterPoller.jobDataCache), 0, "Error: The job cache should be empty.") self.injectJobs() mySubmitterPoller.refreshCache() # Verify the cache is full self.assertEqual( len(mySubmitterPoller.jobDataCache), 20, "Error: The job cache should contain 20 jobs. Contains: %i" % len(mySubmitterPoller.jobDataCache)) killWorkflow("wf001", jobCouchConfig=config) mySubmitterPoller.refreshCache() # Verify that the workflow is gone from the cache self.assertEqual( len(mySubmitterPoller.jobDataCache), 10, "Error: The job cache should contain 10 jobs. Contains: %i" % len(mySubmitterPoller.jobDataCache)) killWorkflow("wf002", jobCouchConfig=config) mySubmitterPoller.refreshCache() # Verify that the workflow is gone from the cache self.assertEqual( len(mySubmitterPoller.jobDataCache), 0, "Error: The job cache should be empty. Contains: %i" % len(mySubmitterPoller.jobDataCache)) return
class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBSBuffer.Database", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault = False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = MockDBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = threading.currentThread().logger, dbinterface = threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def setupForKillTest(self, baAPI = None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daoFactory(classname = "Locations.New") changeStateAction = daoFactory(classname = "Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000) inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations = "goodse.cern.ch") inputFileB = File("lfnB", locations = "goodse.cern.ch") inputFileC = File("lfnC", locations = "goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations = "goodse.cern.ch") unmergedFileB = File("ulfnB", locations = "goodse.cern.ch") unmergedFileC = File("ulfnC", locations = "goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset = inputFileset, workflow = mainProcWorkflow, type = "Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription = self.mainProcSub) procJobGroup.create() self.procJobA = Job(name = "ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name = "ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name = "ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset = unmergedOutputFileset, workflow = mainProcMergeWorkflow, type = "Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription = self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name = "MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name = "MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name = "MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset = unmergedOutputFileset, workflow = mainCleanupWorkflow, type = "Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription = self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name = "CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name = "CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name = "CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = 'Steve' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs = jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations = "goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec = "spec2", owner = "Steve", name = "Bogus", task = "Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset = bogusFileset, workflow = bogusWorkflow, type = "Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual(list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config = config) # Create nine jobs self.setupForKillTest(baAPI = baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job = 1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size = 1, max_merge_size = 2, max_merge_events = 3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBase", files_per_job = 50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule = "Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job = 1, include_parents = True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) return testWorkload def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") testResubmitWMBSHelper = WMBSHelper(testWorkload, "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createSubscription() mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = fakeSiteDB() def createWMSpec(self, name = 'ReRecoWorkload'): wmspec = rerecoWorkload(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") return wmspec def createMCWMSpec(self, name = 'MonteCarloWorkload'): wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents = 10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = MockDBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask = None): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, block, mask, cachepath = self.workDir) if block: block = self.dbs.getFileBlock(block)[block] wmbs.createSubscriptionAndAddFiles(block = block) return wmbs # def testProduction(self): # """Production workflow""" # pass def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#2" #add run blacklist self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#2" # Run Whitelist self.topLevelTask.setInputRunWhitelist([2]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#1" wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testParentage(self): """ TODO: add the parentage test. 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ pass def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname = "Locations.New") ses = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName = site, seName = self.siteDB.cmsNametoSE(site)) ses.append(self.siteDB.cmsNametoSE(site)) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
class PileupFetcherTest(unittest.TestCase): def setUp(self): """ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("pileupfetcher_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("pileupfetcher_t") self.testDir = self.testInit.generateWorkDir() EmulatorHelper.setEmulators(dbs=True) def tearDown(self): """ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() EmulatorHelper.resetEmulators() def injectGenerationConfig(self): """ _injectGenerationConfig_ Inject a generation config for the MC workflow. """ config = Document() config["info"] = None config["config"] = None config["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" config["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" config["owner"] = {"group": "cmsdataops", "user": "******"} config["pset_tweak_details"] = None config["pset_tweak_details"] = \ {"process": {"outputModules_": ["OutputA"], "OutputA": {"dataset": {"filterName": "OutputAFilter", "dataTier": "GEN-SIM-RAW"}}}} result = self.configDatabase.commitOne(config) return result[0]["id"] def _queryAndCompareWithDBS(self, pileupDict, defaultArguments, dbsUrl): """ pileupDict is a Python dictionary containing particular pileup configuration information. Query DBS on given dataset contained now in both input defaultArguments as well as in the pileupDict and compare values. """ args = {} args["version"] = "DBS_2_0_9" args["mode"] = "GET" reader = DBSReader(dbsUrl, **args) inputArgs = defaultArguments["PileupConfig"] self.assertEqual(len(inputArgs), len(pileupDict), "Number of pileup types different.") for pileupType in inputArgs: m = ("pileup type '%s' not in PileupFetcher-produced pileup " "configuration: '%s'" % (pileupType, pileupDict)) self.assertTrue(pileupType in pileupDict, m) # now query DBS for compare actual results on files lists for each # pileup type and dataset and location (storage element names) # pileupDict is saved in the file and now comparing items of this # configuration with actual DBS results, the structure of pileupDict: # {"pileupTypeA": {"BlockA": {"FileList": [], "StorageElementNames": []}, # "BlockB": {"FileList": [], "StorageElementName": []}, ....} for pileupType, datasets in inputArgs.items(): # this is from the pileup configuration produced by PileupFetcher blockDict = pileupDict[pileupType] for dataset in datasets: dbsFileBlocks = reader.listFileBlocks(dataset=dataset) for dbsFileBlockName in dbsFileBlocks: fileList = [ ] # list of files in the block (dbsFile["LogicalFileName"]) storageElemNames = set() # list of StorageElementName # each DBS block has a list under 'StorageElementList', iterate over storageElements = reader.listFileBlockLocation( dbsFileBlockName) for storElem in storageElements: storageElemNames.add(storElem) # now get list of files in the block dbsFiles = reader.listFilesInBlock(dbsFileBlockName) for dbsFile in dbsFiles: fileList.append(dbsFile["LogicalFileName"]) # now compare the sets: m = ( "StorageElementNames don't agree for pileup type '%s', " "dataset '%s' in configuration: '%s'" % (pileupType, dataset, pileupDict)) self.assertEqual( set(blockDict[dbsFileBlockName] ["StorageElementNames"]), storageElemNames, m) m = ( "FileList don't agree for pileup type '%s', dataset '%s' " " in configuration: '%s'" % (pileupType, dataset, pileupDict)) print(fileList) print(blockDict[dbsFileBlockName]["FileList"]) self.assertEqual( sorted(blockDict[dbsFileBlockName]["FileList"]), sorted(fileList)) def _queryPileUpConfigFile(self, defaultArguments, task, taskPath): """ Query and compare contents of the the pileup JSON configuration files. Iterate over tasks's steps as it happens in the PileupFetcher. """ for step in task.steps().nodeIterator(): helper = WMStep.WMStepHelper(step) # returns e.g. instance of CMSSWHelper if hasattr(helper.data, "pileup"): decoder = JSONDecoder() stepPath = "%s/%s" % (taskPath, helper.name()) pileupConfig = "%s/%s" % (stepPath, "pileupconf.json") try: f = open(pileupConfig, 'r') json = f.read() pileupDict = decoder.decode(json) f.close() except IOError: m = "Could not read pileup JSON configuration file: '%s'" % pileupConfig self.fail(m) self._queryAndCompareWithDBS(pileupDict, defaultArguments, helper.data.dbsUrl) def testPileupFetcherOnMC(self): pileupMcArgs = MonteCarloWorkloadFactory.getTestArguments() pileupMcArgs["PileupConfig"] = { "cosmics": [ "/Mu/PenguinsPenguinsEverywhere-SingleMu-HorriblyJaundicedYellowEyedPenginsSearchingForCarrots-v31/RECO" ], "minbias": [ "/Mu/PenguinsPenguinsEverywhere-SingleMu-HorriblyJaundicedYellowEyedPenginsSearchingForCarrots-v31/RECO" ] } pileupMcArgs["CouchURL"] = os.environ["COUCHURL"] pileupMcArgs["CouchDBName"] = "pileupfetcher_t" pileupMcArgs["ConfigCacheID"] = self.injectGenerationConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", pileupMcArgs) # Since this is test of the fetcher - The loading from WMBS isn't # really necessary because the fetching happens before the workflow # is inserted into WMBS: feed the workload instance directly into fetcher: fetcher = PileupFetcher() creator = SandboxCreator() pathBase = "%s/%s" % (self.testDir, testWorkload.name()) for topLevelTask in testWorkload.taskIterator(): for taskNode in topLevelTask.nodeIterator(): # this is how the call to PileupFetcher is happening # from the SandboxCreator test task = WMTask.WMTaskHelper(taskNode) taskPath = "%s/WMSandbox/%s" % (pathBase, task.name()) fetcher.setWorkingDirectory(taskPath) # create Sandbox for the fetcher ... creator._makePathonPackage(taskPath) fetcher(task) self._queryPileUpConfigFile(pileupMcArgs, task, taskPath)
class MonteCarloFromGENTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ Initialize the database. """ super(MonteCarloFromGENTest, self).setUp() self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("mclhe_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("mclhe_t") self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() super(MonteCarloFromGENTest, self).tearDown() return def injectConfig(self): """ _injectConfig_ Create a bogus config cache document and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" newConfig["owner"] = {"group": "cmsdataops", "user": "******"} newConfig["pset_tweak_details"] = {"process": {"outputModules_": ["outputRECORECO", "outputALCARECOALCARECO"], "outputRECORECO": {"dataset": {"filterName": "FilterRECO", "dataTier": "RECO"}}, "outputALCARECOALCARECO": { "dataset": {"filterName": "FilterALCARECO", "dataTier": "ALCARECO"}}}} result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = MonteCarloFromGENWorkloadFactory.getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) outputDatasets = testWorkload.listOutputDatasets() self.assertEqual(len(outputDatasets), 2) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterRECO-FAKE-v1/RECO" in outputDatasets) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-FAKE-v1/ALCARECO" in outputDatasets) productionTask = testWorkload.getTaskByPath('/TestWorkload/MonteCarloFromGEN') splitting = productionTask.jobSplittingParameters() self.assertFalse(splitting["deterministicPileup"]) testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'production') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlca, workflow=alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return def testMCFromGENWithPileup(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = MonteCarloFromGENWorkloadFactory.getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" # Add pileup inputs arguments["MCPileup"] = "/HighPileUp/Run2011A-v1/RAW" arguments["DataPileup"] = "/Cosmics/ComissioningHI-v1/RAW" arguments["DeterministicPileup"] = True factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) productionTask = testWorkload.getTaskByPath('/TestWorkload/MonteCarloFromGEN') cmsRunStep = productionTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertEqual(pileupData.mc.dataset, [arguments["MCPileup"]]) self.assertEqual(pileupData.data.dataset, [arguments["DataPileup"]]) splitting = productionTask.jobSplittingParameters() self.assertTrue(splitting["deterministicPileup"]) def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ defaultArguments = MonteCarloFromGENWorkloadFactory.getTestArguments() defaultArguments["ConfigCacheID"] = self.injectConfig() defaultArguments["CouchDBName"] = "mclhe_t" defaultArguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) # test default values taskObj = testWorkload.getTask('MonteCarloFromGEN') for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided defaultArguments["Multicore"] = 6 defaultArguments["Memory"] = 4600.0 defaultArguments["EventStreams"] = 3 testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) taskObj = testWorkload.getTask('MonteCarloFromGEN') for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), defaultArguments["Multicore"]) self.assertEqual(stepHelper.getNumberOfStreams(), defaultArguments["EventStreams"]) else: self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], defaultArguments["Memory"]) return
class JobCreatorTest(unittest.TestCase): """ Test case for the JobCreator """ sites = ['T2_US_Florida', 'T2_US_UCSD', 'T2_TW_Taiwan', 'T1_CH_CERN'] def setUp(self): """ _setUp_ Setup the database and logging connection. Try to create all of the WMBS tables. Also, create some dummy locations. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=['WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database'], useDefault=False) self.couchdbname = "jobcreator_t" self.testInit.setupCouch("%s/jobs" % self.couchdbname, "JobDump") self.testInit.setupCouch("%s/fwjrs" % self.couchdbname, "FWJRDump") self.configFile = EmulatorSetup.setupWMAgentConfig() myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = self.daoFactory(classname="Locations.New") for site in self.sites: locationAction.execute(siteName=site, pnn=site) # Create sites in resourceControl resourceControl = ResourceControl() for site in self.sites: resourceControl.insertSite(siteName=site, pnn=site, ceName=site) resourceControl.insertThreshold(siteName=site, taskType='Processing', maxSlots=10000, pendingSlots=10000) self.resourceControl = resourceControl self._setup = True self._teardown = False self.testDir = self.testInit.generateWorkDir() self.cwd = os.getcwd() # Set heartbeat self.componentName = 'JobCreator' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.clearDatabase(modules=['WMCore.WMBS', 'WMCore.ResourceControl', 'WMCore.Agent.Database']) self.testInit.delWorkDir() self._teardown = True self.testInit.tearDownCouch() EmulatorSetup.deleteConfig(self.configFile) return def createJobCollection(self, name, nSubs, nFiles, workflowURL='test'): """ _createJobCollection_ Create a collection of jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") testWorkflow.create() for sub in range(nSubs): nameStr = '%s-%i' % (name, sub) myThread.transaction.begin() testFileset = Fileset(name=nameStr) testFileset.create() for f in range(nFiles): # pick a random site site = random.choice(self.sites) testFile = File(lfn="/lfn/%s/%i" % (nameStr, f), size=1024, events=10) testFile.setLocation(site) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() myThread.transaction.commit() return def createWorkload(self, workloadName='Test', emulator=True, priority=1): """ _createTestWorkload_ Creates a test workload for us to run on, hold the basic necessities. """ workload = testWorkload("Tier1ReReco") rereco = workload.getTask("ReReco") seederDict = {"generator.initialSeed": 1001, "evtgenproducer.initialSeed": 1001} rereco.addGenerator("PresetSeeder", **seederDict) taskMaker = TaskMaker(workload, os.path.join(self.testDir, 'workloadTest')) taskMaker.skipSubscription = True taskMaker.processWorkload() return workload def getConfig(self): """ _getConfig_ Creates a common config. """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) # First the general stuff config.section_("General") config.General.workDir = os.getenv("TESTDIR", os.getcwd()) config.section_("Agent") config.Agent.componentName = self.componentName # Now the CoreDatabase information # This should be the dialect, dburl, etc config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") config.component_("JobCreator") config.JobCreator.namespace = 'WMComponent.JobCreator.JobCreator' # The log level of the component. # config.JobCreator.logLevel = 'SQLDEBUG' config.JobCreator.logLevel = 'INFO' # maximum number of threads we want to deal # with messages per pool. config.JobCreator.maxThreads = 1 config.JobCreator.UpdateFromResourceControl = True config.JobCreator.pollInterval = 10 # config.JobCreator.jobCacheDir = self.testDir config.JobCreator.defaultJobType = 'processing' # Type of jobs that we run, used for resource control config.JobCreator.workerThreads = 4 config.JobCreator.componentDir = self.testDir config.JobCreator.useWorkQueue = True config.JobCreator.WorkQueueParams = {'emulateDBSReader': True} # We now call the JobMaker from here config.component_('JobMaker') config.JobMaker.logLevel = 'INFO' config.JobMaker.namespace = 'WMCore.WMSpec.Makers.JobMaker' config.JobMaker.maxThreads = 1 config.JobMaker.makeJobsHandler = 'WMCore.WMSpec.Makers.Handlers.MakeJobs' # JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', 'cmssrv52.fnal.gov:5984') config.JobStateMachine.couchDBName = self.couchdbname return config def testVerySimpleTest(self): """ _VerySimpleTest_ Just test that everything works...more or less """ # return myThread = threading.currentThread() config = self.getConfig() name = makeUUID() nSubs = 5 nFiles = 10 workloadName = 'TestWorkload' dummyWorkload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) testJobCreator = JobCreatorPoller(config=config) # First, can we run once without everything crashing? testJobCreator.algorithm() getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), nSubs * nFiles) # Count database objects result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall() self.assertEqual(len(result), nSubs * nFiles) # Find the test directory testDirectory = os.path.join(self.testDir, 'jobCacheDir', 'TestWorkload', 'ReReco') # It should have at least one jobGroup self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory)) # But no more then twenty self.assertTrue(len(os.listdir(testDirectory)) <= 20) groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0') # First job should be in here listOfDirs = [] for tmpDirectory in os.listdir(testDirectory): listOfDirs.extend(os.listdir(os.path.join(testDirectory, tmpDirectory))) self.assertTrue('job_1' in listOfDirs) self.assertTrue('job_2' in listOfDirs) self.assertTrue('job_3' in listOfDirs) jobDir = os.listdir(groupDirectory)[0] jobFile = os.path.join(groupDirectory, jobDir, 'job.pkl') self.assertTrue(os.path.isfile(jobFile)) f = open(jobFile, 'r') job = pickle.load(f) f.close() self.assertEqual(job.baggage.PresetSeeder.generator.initialSeed, 1001) self.assertEqual(job.baggage.PresetSeeder.evtgenproducer.initialSeed, 1001) self.assertEqual(job['workflow'], name) self.assertEqual(len(job['input_files']), 1) self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2') return @attr('performance', 'integration') def testProfilePoller(self): """ Profile your performance You shouldn't be running this normally because it doesn't do anything """ myThread = threading.currentThread() name = makeUUID() nSubs = 5 nFiles = 1500 workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) config = self.getConfig() testJobCreator = JobCreatorPoller(config=config) cProfile.runctx("testJobCreator.algorithm()", globals(), locals(), filename="testStats.stat") getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") time.sleep(10) self.assertEqual(len(result), nSubs * nFiles) p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats(.2) return @attr('integration') def testProfileWorker(self): """ Profile where the work actually gets done You shouldn't be running this one either, since it doesn't test anything. """ myThread = threading.currentThread() name = makeUUID() nSubs = 5 nFiles = 500 workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) config = self.getConfig() configDict = {"couchURL": config.JobStateMachine.couchurl, "couchDBName": config.JobStateMachine.couchDBName, 'jobCacheDir': config.JobCreator.jobCacheDir, 'defaultJobType': config.JobCreator.defaultJobType} subs = [{"subscription": 1}, {"subscription": 2}, {"subscription": 3}, {"subscription": 4}, {"subscription": 5}] testJobCreator = JobCreatorPoller(**configDict) cProfile.runctx("testJobCreator.algorithm(parameters = input)", globals(), locals(), filename="workStats.stat") p = pstats.Stats('workStats.stat') p.sort_stats('cumulative') p.print_stats(.2) return @attr('integration') def testHugeTest(self): """ Don't run this one either """ myThread = threading.currentThread() config = self.getConfig() name = makeUUID() nSubs = 10 nFiles = 5000 workloadName = 'Tier1ReReco' dummyWorkload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) testJobCreator = JobCreatorPoller(config=config) # First, can we run once without everything crashing? startTime = time.time() testJobCreator.algorithm() stopTime = time.time() getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), nSubs * nFiles) print("Job took %f seconds to run" % (stopTime - startTime)) # Count database objects result = myThread.dbi.processData('SELECT * FROM wmbs_sub_files_acquired')[0].fetchall() self.assertEqual(len(result), nSubs * nFiles) return def stuffWMBS(self, workflowURL, name): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="s1", pnn="somese.cern.ch") mergeFileset = Fileset(name="mergeFileset") mergeFileset.create() bogusFileset = Fileset(name="bogusFileset") bogusFileset.create() mergeWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") mergeWorkflow.create() mergeSubscription = Subscription(fileset=mergeFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") mergeSubscription.create() dummySubscription = Subscription(fileset=bogusFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"}) file1.addRun(Run(1, *[45])) file1.create() file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"}) file2.addRun(Run(1, *[45])) file2.create() file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"}) file3.addRun(Run(1, *[45])) file3.create() file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations={"somese.cern.ch"}) file4.addRun(Run(1, *[45])) file4.create() fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"}) fileA.addRun(Run(1, *[46])) fileA.create() fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"}) fileB.addRun(Run(1, *[46])) fileB.create() fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"}) fileC.addRun(Run(1, *[46])) fileC.create() fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"}) fileI.addRun(Run(2, *[46])) fileI.create() fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"}) fileII.addRun(Run(2, *[46])) fileII.create() fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"}) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIV = File(lfn="fileIV", size=1024 * 1000000, events=1024, first_event=3072, locations={"somese.cern.ch"}) fileIV.addRun(Run(2, *[46])) fileIV.create() for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV]: mergeFileset.addFile(fileObj) bogusFileset.addFile(fileObj) mergeFileset.commit() bogusFileset.commit() return def testTestNonProxySplitting(self): """ _TestNonProxySplitting_ Test and see if we can split things without a proxy. """ config = self.getConfig() config.JobCreator.workerThreads = 1 name = makeUUID() workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.stuffWMBS(workflowURL=workloadPath, name=name) testJobCreator = JobCreatorPoller(config=config) testJobCreator.algorithm() getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 1) result = getJobsAction.execute(state='Created', jobType="Merge") self.assertEqual(len(result), 0) return
class TaskChainTests(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("taskchain_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("taskchain_t") self.testInit.generateWorkDir() self.workload = None return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def testGeneratorWorkflow(self): """ _testGeneratorWorkflow_ Test creating a request with an initial generator task it mocks a request where there are 2 similar paths starting from the generator, each one with a different PrimaryDataset, CMSSW configuration and processed dataset. Dropping the RAW output as well. Also include an ignored output module to keep things interesting... """ generatorDoc = makeGeneratorConfig(self.configDatabase) processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 6, "IgnoredOutputModules": ["writeSkim2", "writeRAWDEBUGDIGI"], "Task1": { "TaskName": "GenSim", "ConfigCacheID": generatorDoc, "SplittingAlgo": "EventBased", "RequestNumEvents": 10000, "Seeding": "AutomaticSeeding", "PrimaryDataset": "RelValTTBar", }, "Task2": { "TaskName": "DigiHLT_new", "InputTask": "GenSim", "InputFromOutputModule": "writeGENSIM", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "LumiBased", "CMSSWVersion": "CMSSW_5_2_6", "GlobalTag": "GR_39_P_V5:All", "PrimaryDataset": "PURelValTTBar", "KeepOutput": False }, "Task3": { "TaskName": "DigiHLT_ref", "InputTask": "GenSim", "InputFromOutputModule": "writeGENSIM", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "EventBased", "CMSSWVersion": "CMSSW_5_2_7", "GlobalTag": "GR_40_P_V5:All", "AcquisitionEra": "ReleaseValidationNewConditions", "ProcessingVersion": 3, "ProcessingString": "Test", "KeepOutput": False }, "Task4": { "TaskName": "Reco", "InputTask": "DigiHLT_new", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "SplittingAlgo": "FileBased", "TransientOutputModules": ["writeRECO"] }, "Task5": { "TaskName": "ALCAReco", "InputTask": "DigiHLT_ref", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['ALCAReco'], "SplittingAlgo": "LumiBased", }, "Task6": { "TaskName": "Skims", "InputTask": "Reco", "InputFromOutputModule": "writeRECO", "ConfigCacheID": processorDocs['Skims'], "SplittingAlgo": "LumiBased", } } testArguments.update(arguments) arguments = testArguments print arguments factory = TaskChainWorkloadFactory() # Test a malformed task chain definition arguments['Task4']['TransientOutputModules'].append('writeAOD') self.assertRaises(WMSpecFactoryException, factory.validateSchema, arguments) arguments['Task4']['TransientOutputModules'].remove('writeAOD') try: self.workload = factory.factoryWorkloadConstruction( "PullingTheChain", arguments) except Exception, ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) import traceback traceback.print_exc() self.fail(msg) print self.workload.data testWMBSHelper = WMBSHelper(self.workload, "GenSim", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) firstTask = self.workload.getTaskByPath("/PullingTheChain/GenSim") self._checkTask(firstTask, arguments['Task1'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new"), arguments['Task2'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref"), arguments['Task3'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco" ), arguments['Task4'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref/ALCAReco" ), arguments['Task5'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco/Skims" ), arguments['Task6'], arguments) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 11, "Number of output datasets doesn't match") self.assertTrue( "/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM" in outputDatasets, "/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM not in output datasets" ) self.assertFalse( "/RelValTTBar/ReleaseValidation-reco-v1/RECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-reco-v1/RECO in output datasets") self.assertTrue( "/RelValTTBar/ReleaseValidation-AOD-v1/AOD" in outputDatasets, "/RelValTTBar/ReleaseValidation-AOD-v1/AOD not in output datasets") self.assertTrue( "/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO not in output datasets" ) for i in range(1, 5): self.assertTrue( "/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO not in output datasets" % i) for i in range(1, 6): if i == 2: continue self.assertTrue( "/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD not in output datasets" % i) return
class MonteCarloTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ super(MonteCarloTest, self).setUp() self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch(TEST_DB_NAME, "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.testInit.generateWorkDir() couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase(TEST_DB_NAME) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.listTasksByWorkflow = self.daoFactory(classname="Workflow.LoadFromName") self.listFilesets = self.daoFactory(classname="Fileset.List") self.listSubsMapping = self.daoFactory(classname="Subscriptions.ListSubsAndFilesetsFromWorkflow") return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() super(MonteCarloTest, self).tearDown() return def injectMonteCarloConfig(self): """ _injectMonteCarlo_ Create a bogus config cache document for the montecarlo generation and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" newConfig["owner"] = {"group": "cmsdataops", "user": "******"} newConfig["pset_tweak_details"] = {"process": {"outputModules_": ["OutputA", "OutputB"], "OutputA": {"dataset": {"filterName": "OutputAFilter", "dataTier": "RECO"}}, "OutputB": {"dataset": {"filterName": "OutputBFilter", "dataTier": "USER"}}}} result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def _commonMonteCarloTest(self): """ Retrieve the workload from WMBS and test all its properties. """ goldenOutputMods = {"OutputA": "RECO", "OutputB": "USER"} prodWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production") prodWorkflow.load() self.assertEqual(len(prodWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = prodWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = prodWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % (goldenOutputMod + tier), "Error: Unmerged output fileset is wrong.") logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") from pprint import pformat print(pformat(mergeWorkflow.outputMap)) mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Production-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset=topLevelFileset, workflow=prodWorkflow) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedOutput = Fileset(name="/TestWorkload/Production/unmerged-%s" % fset) unmergedOutput.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmerged = Fileset(name="/TestWorkload/Production/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/Production/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") for goldenOutputMod in goldenOutputMods: mergeLogCollect = Fileset( name="/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod) mergeLogCollect.loadData() mergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) mergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=mergeLogCollect, workflow=mergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") def testMonteCarlo(self): """ _testMonteCarlo_ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return def testMonteCarloExtension(self): """ _testMonteCarloExtension_ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. This uses a non-zero first lumi. Check that the splitting arguments are correctly set for the lfn counter. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["FirstLumi"] = 10001 defaultArguments["EventsPerJob"] = 100 defaultArguments["FirstEvent"] = 10001 # defaultArguments["FirstEvent"] = 10001 initial_lfn_counter = 100 # EventsPerJob == EventsPerLumi, then the number of previous jobs is equal to the number of the initial lumi factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') productionSplitting = productionTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in productionSplitting, "No initial lfn counter was stored") self.assertEqual(productionSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") for outputMod in ["OutputA", "OutputB"]: mergeTask = testWorkload.getTaskByPath('/TestWorkload/Production/ProductionMerge%s' % outputMod) mergeSplitting = mergeTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in mergeSplitting, "No initial lfn counter was stored") self.assertEqual(mergeSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") return def testMCWithPileup(self): """ _testMCWithPileup_ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. The input configuration includes pileup input files. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() # Add pileup inputs defaultArguments["MCPileup"] = COSMICS_PU defaultArguments["DataPileup"] = DATA_PU defaultArguments["DeterministicPileup"] = True factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') cmsRunStep = productionTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertEqual(pileupData.data.dataset, [DATA_PU]) self.assertEqual(pileupData.mc.dataset, [COSMICS_PU]) splitting = productionTask.jobSplittingParameters() self.assertTrue(splitting["deterministicPileup"]) return def testMCWithLHE(self): """ _testMCWithLHE_ Create a MonteCarlo workflow with a variation on the type of work done, this refers to the previous LHEStepZero where the input can be .lhe files and there is more than one lumi per job. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["LheInputFiles"] = "True" defaultArguments["EventsPerJob"] = 200 defaultArguments["EventsPerLumi"] = 50 factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') splitting = productionTask.jobSplittingParameters() self.assertEqual(splitting["events_per_job"], 200) self.assertEqual(splitting["events_per_lumi"], 50) self.assertEqual(splitting["lheInputFiles"], True) self.assertFalse(splitting["deterministicPileup"]) return def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ defaultArguments = MonteCarloWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = TEST_DB_NAME defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) # test default values taskObj = testWorkload.getTask('Production') for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided defaultArguments["Multicore"] = 6 defaultArguments["Memory"] = 4600.0 defaultArguments["EventStreams"] = 3 testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) taskObj = testWorkload.getTask('Production') for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), defaultArguments["Multicore"]) self.assertEqual(stepHelper.getNumberOfStreams(), defaultArguments["EventStreams"]) else: self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], defaultArguments["Memory"]) return def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = ['/TestWorkload/Production', '/TestWorkload/Production/ProductionMergeOutputB', '/TestWorkload/Production/ProductionMergeOutputA'] expWfTasks = ['/TestWorkload/Production', '/TestWorkload/Production/LogCollect', '/TestWorkload/Production/ProductionCleanupUnmergedOutputA', '/TestWorkload/Production/ProductionCleanupUnmergedOutputB', '/TestWorkload/Production/ProductionMergeOutputA', '/TestWorkload/Production/ProductionMergeOutputA/ProductionOutputAMergeLogCollect', '/TestWorkload/Production/ProductionMergeOutputB', '/TestWorkload/Production/ProductionMergeOutputB/ProductionOutputBMergeLogCollect'] expFsets = ['FILESET_DEFINED_DURING_RUNTIME', '/TestWorkload/Production/unmerged-OutputBUSER', '/TestWorkload/Production/ProductionMergeOutputA/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputA/merged-MergedRECO', '/TestWorkload/Production/ProductionMergeOutputB/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputB/merged-MergedUSER', '/TestWorkload/Production/unmerged-logArchive', '/TestWorkload/Production/unmerged-OutputARECO'] subMaps = ['FILESET_DEFINED_DURING_RUNTIME', (6, '/TestWorkload/Production/ProductionMergeOutputA/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputA/ProductionOutputAMergeLogCollect', 'MinFileBased', 'LogCollect'), (3, '/TestWorkload/Production/ProductionMergeOutputB/merged-logArchive', '/TestWorkload/Production/ProductionMergeOutputB/ProductionOutputBMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/Production/unmerged-logArchive', '/TestWorkload/Production/LogCollect', 'MinFileBased', 'LogCollect'), (7, '/TestWorkload/Production/unmerged-OutputARECO', '/TestWorkload/Production/ProductionCleanupUnmergedOutputA', 'SiblingProcessingBased', 'Cleanup'), (5, '/TestWorkload/Production/unmerged-OutputARECO', '/TestWorkload/Production/ProductionMergeOutputA', 'ParentlessMergeBySize', 'Merge'), (4, '/TestWorkload/Production/unmerged-OutputBUSER', '/TestWorkload/Production/ProductionCleanupUnmergedOutputB', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/Production/unmerged-OutputBUSER', '/TestWorkload/Production/ProductionMergeOutputB', 'ParentlessMergeBySize', 'Merge')] testArguments = MonteCarloWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = TEST_DB_NAME testArguments["ConfigCacheID"] = self.injectMonteCarloConfig() factory = MonteCarloWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) myMask = Mask(FirstRun=1, FirstLumi=1, FirstEvent=1, LastRun=1, LastLumi=10, LastEvent=1000) testWMBSHelper = WMBSHelper(testWorkload, "Production", mask=myMask, cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # same function as in WMBSHelper, otherwise we cannot know which fileset name is maskString = ",".join(["%s=%s" % (x, myMask[x]) for x in sorted(myMask)]) topFilesetName = 'TestWorkload-Production-%s' % md5(maskString).hexdigest() expFsets[0] = topFilesetName # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subMaps[0] = (1, topFilesetName, '/TestWorkload/Production', 'EventBased', 'Production') subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps) ### create another top level subscription myMask = Mask(FirstRun=1, FirstLumi=11, FirstEvent=1001, LastRun=1, LastLumi=20, LastEvent=2000) testWMBSHelper = WMBSHelper(testWorkload, "Production", mask=myMask, cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # same function as in WMBSHelper, otherwise we cannot know which fileset name is maskString = ",".join(["%s=%s" % (x, myMask[x]) for x in sorted(myMask)]) topFilesetName = 'TestWorkload-Production-%s' % md5(maskString).hexdigest() expFsets.append(topFilesetName) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subMaps.append((9, topFilesetName, '/TestWorkload/Production', 'EventBased', 'Production')) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
class JobUpdaterTest(unittest.TestCase): """ _JobUpdaterTest_ Test class for the JobUpdater """ def setUp(self): """ _setUp_ Set up test environment """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMCore.BossAir"], useDefault = False) self.testInit.setupCouch('workqueue_t', 'WorkQueue') self.testInit.setupCouch('workqueue_inbox_t', 'WorkQueue') self.testDir = self.testInit.generateWorkDir(deleteOnDestruction = False) EmulatorHelper.setEmulators(phedex = True, dbs = True, siteDB = True, requestMgr = True) myThread = threading.currentThread() self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) self.listWorkflows = self.daoFactory(classname = "Workflow.ListForSubmitter") self.configFile = EmulatorSetup.setupWMAgentConfig() def tearDown(self): """ _tearDown_ Tear down the databases """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() EmulatorHelper.resetEmulators() EmulatorSetup.deleteConfig(self.configFile) def getConfig(self): """ _getConfig_ Get a test configuration for the JobUpdater tests """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) config.section_('Agent') config.Agent.agentName = 'testAgent' config.section_('CoreDatabase') config.CoreDatabase.connectUrl = os.environ['DATABASE'] config.CoreDatabase.socket = os.getenv('DBSOCK') # JobTracker config.component_('JobUpdater') config.JobUpdater.reqMgrUrl = 'https://cmsweb-dev.cern.ch/reqmgr/reqMgr' # JobStateMachine config.section_('JobStateMachine') config.JobStateMachine.couchDBName = 'bogus' # BossAir config.section_('BossAir') config.BossAir.pluginNames = ['MockPlugin'] config.BossAir.pluginDir = 'WMCore.BossAir.Plugins' config.BossAir.multicoreTaskTypes = ['MultiProcessing', 'MultiProduction'] config.BossAir.nCondorProcesses = 1 config.BossAir.section_('MockPlugin') config.BossAir.MockPlugin.fakeReport = os.path.join(getTestBase(), 'WMComponent_t/JobAccountant_t/fwjrs', 'MergedSkimSuccess.pkl') # WorkQueue config.component_('WorkQueueManager') config.WorkQueueManager.couchurl = os.environ['COUCHURL'] config.WorkQueueManager.dbname = 'workqueue_t' config.WorkQueueManager.inboxDatabase = 'workqueue_inbox_t' return config def stuffWMBS(self): """ _stuffWMBS_ Stuff WMBS with workflows """ workflow = Workflow(spec = 'spec.xml', name = 'ReRecoTest_v0Emulator', task = '/ReRecoTest_v0Emulator/Test', priority = 10) workflow.create() inputFileset = Fileset(name = 'TestFileset') inputFileset.create() subscription = Subscription(inputFileset, workflow) subscription.create() def test_BasicTest(self): """ _BasicTest_ Basic sanity check """ self.stuffWMBS() poller = JobUpdaterPoller(self.getConfig()) poller.reqmgr.getAssignment(self) result = self.listWorkflows.execute() self.assertEqual(len(result), 1) self.assertEqual(result[0]['priority'], 10) poller.algorithm() result = self.listWorkflows.execute() self.assertEqual(result[0]['priority'], 100)
class ServiceTest(unittest.TestCase): def setUp(self): """ Setup for unit tests """ self.testInit = TestInit(__file__) self.testDir = self.testInit.generateWorkDir() testname = self.id().split('.')[-1] logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M', filename='service_unittests.log', filemode='w') logger_name = 'Service%s' % testname.replace('test', '', 1) self.logger = logging.getLogger(logger_name) # self.cache_path = tempfile.mkdtemp() test_dict = { 'logger': self.logger, 'endpoint': 'https://github.com/dmwm' } self.myService = Service(test_dict) test_dict[ 'endpoint'] = 'http://cmssw-test.cvs.cern.ch/cgi-bin/cmssw.cgi' self.myService2 = Service(test_dict) self.testUrl = 'http://cern.ch' self.port = 8888 cherrypy.config.update({'server.socket_port': self.port}) def tearDown(self): self.testInit.delWorkDir() # There was old code here to see if the test passed and send a message to # self.logger.info It broke in 2.7, so if needed find a supported way to do it return def testIsFile(self): """ Test the `isfile` utilitarian function """ f = tempfile.NamedTemporaryFile(prefix="testIsFile", delete=True) self.assertTrue(isfile(f)) f.close() self.assertTrue(isfile(f)) strio = BytesIO() self.assertTrue(isfile(strio)) strio.close() self.assertTrue(isfile(strio)) self.assertFalse(isfile("/data/srv/alan.txt")) self.assertFalse(isfile(1)) self.assertFalse(isfile(None)) def testCacheExpired(self): """ Test the `cache_expired` utilitarian function. Delta is in hours """ # file-like object is always considered expired fcache = tempfile.NamedTemporaryFile(prefix="testIsFile", delete=True) self.assertTrue(cache_expired(fcache, delta=0)) self.assertTrue(cache_expired(fcache, delta=100)) fcache.close() self.assertTrue(cache_expired(fcache, delta=0)) self.assertTrue(cache_expired(fcache, delta=100)) # path to a file that does not exist, always expired newfile = fcache.name + 'testCacheExpired' self.assertTrue(cache_expired(newfile, delta=0)) self.assertTrue(cache_expired(newfile, delta=100)) # now create and write something to it with open(newfile, 'w') as f: f.write("whatever") self.assertFalse(cache_expired(newfile, delta=1)) time.sleep(1) self.assertTrue(cache_expired(newfile, delta=0)) self.assertFalse(cache_expired(newfile, delta=1)) def testClear(self): """ Populate the cache, and then check that it's deleted """ f = self.myService.refreshCache('testClear', '/WMCore/blob/master/setup.py#L11') self.assertTrue(os.path.exists(f.name)) f.close() self.myService.clearCache('testClear') self.assertFalse(os.path.exists(f.name)) def testClearAndRepopulate(self): """ Populate the cache, and then check that it's deleted """ f = self.myService.refreshCache('testClear', '/WMCore/blob/master/setup.py#L11') self.assertTrue(os.path.exists(f.name)) f.close() self.myService.clearCache('testClear') self.assertFalse(os.path.exists(f.name)) f = self.myService.refreshCache('testClear', '/WMCore/blob/master/setup.py#L11') self.assertTrue(os.path.exists(f.name)) f.close() def testCachePath(self): cache_path = tempfile.mkdtemp() myConfig = { 'logger': self.logger, 'endpoint': 'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cachepath': cache_path, 'req_cache_path': '%s/requests' % cache_path } service = Service(myConfig) # We append hostname to the cachepath, so that we can talk to two # services on different hosts self.assertEqual(service['cachepath'], '%s/cmssw.cvs.cern.ch' % myConfig['cachepath']) shutil.rmtree(cache_path, ignore_errors=True) def testCacheLifetime(self): """Cache deleted if created by Service - else left alone""" myConfig = { 'logger': self.logger, 'endpoint': 'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cacheduration': 24 } os.environ.pop('TMPDIR', None) # Mac sets this by default service = Service(myConfig) cache_path = service['cachepath'] self.assertTrue(os.path.isdir(cache_path)) del service self.assertFalse(os.path.exists(cache_path)) cache_path = tempfile.mkdtemp() myConfig['cachepath'] = cache_path service = Service(myConfig) del service self.assertTrue(os.path.isdir(cache_path)) def testCachePermissions(self): """Raise error if pre-defined cache permission loose""" cache_path = tempfile.mkdtemp() sub_cache_path = os.path.join(cache_path, 'cmssw.cvs.cern.ch') os.makedirs(sub_cache_path, 0o777) myConfig = { 'logger': self.logger, 'endpoint': 'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cacheduration': 100, 'cachepath': cache_path } self.assertRaises(AssertionError, Service, myConfig) # it has to be 0o700 def testCacheDuration(self): myConfig = { 'logger': self.logger, 'endpoint': 'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cacheduration': 100 } service = Service(myConfig) self.assertEqual(service['cacheduration'], myConfig['cacheduration']) def testNoCacheDuration(self): myConfig = { 'logger': self.logger, 'endpoint': 'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cacheduration': None, # 'cachepath' : self.cache_path, # 'req_cache_path': '%s/requests' % self.cache_path } service = Service(myConfig) self.assertEqual(service['cacheduration'], myConfig['cacheduration']) def testSocketTimeout(self): myConfig = { 'logger': self.logger, 'endpoint': 'https://github.com/dmwm', 'cacheduration': None, 'timeout': 10, } service = Service(myConfig) service.getData('%s/socketresettest' % self.testDir, '/WMCore/blob/master/setup.py#L11') self.assertEqual(service['timeout'], myConfig['timeout']) def testStaleCache(self): myConfig = { 'logger': self.logger, 'endpoint': 'https://github.com/dmwm', 'usestalecache': True, } service = Service(myConfig) service.getData('%s/socketresettest' % self.testDir, '/WMCore/blob/master/setup.py#L11') self.assertEqual(service['usestalecache'], myConfig['usestalecache']) def testUsingStaleCache(self): myConfig = { 'logger': self.logger, 'endpoint': 'https://cmssdt.cern.ch/SDT/', 'cacheduration': 0.0005, # cache file lasts 1.8 secs 'timeout': 10, 'usestalecache': True, # 'cachepath' : self.cache_path, # 'req_cache_path': '%s/requests' % self.cache_path } service = Service(myConfig) cache = 'stalecachetest' # Start test from a clear cache service.clearCache(cache) cachefile = service.cacheFileName(cache) self.logger.info( '1st call to refreshCache - should fail, there is no cache file') self.assertRaises(HTTPException, service.refreshCache, cache, '/lies') cacheddata = 'this data is mouldy' with open(cachefile, 'w') as f: f.write(cacheddata) self.logger.info( '2nd call to refreshCache - should pass, data comes from the valid cache' ) data = service.refreshCache(cache, '/lies').read() self.assertEqual(cacheddata, data) # power nap to avoid letting the cache expire time.sleep(1) self.logger.info( '3rd call to refreshCache - should pass, cache is still valid') data = service.refreshCache(cache, '/lies').read() self.assertEqual(cacheddata, data) # sleep a while longer so the cache dies out time.sleep(2) self.logger.info( '4th call to refreshCache - should fail, cache is dead now') self.assertRaises(HTTPException, service.refreshCache, cache, '/lies') # touch/renew the file again cacheddata = 'foo' with open(cachefile, 'w') as f: f.write(cacheddata) # disable usage of stale cache, so doesn't call the endpoint if cache is valid service['usestalecache'] = False self.logger.info( '5th call to refreshCache - should pass, cache is still valid') data = service.refreshCache(cache, '/lies').read() self.assertEqual(cacheddata, data) # consider the cache dead service['cacheduration'] = 0 time.sleep(1) self.logger.info( '6th call to refreshCache - should fail, cache is dead now') self.assertRaises(HTTPException, service.refreshCache, cache, '/lies') def testCacheFileName(self): """Hash url + data to get cache file name""" hashes = {} inputdata = [{}, { 'fred': 'fred' }, { 'fred': 'fred', 'carl': [1, 2] }, { 'fred': 'fred', 'carl': ["1", "2"] }, { 'fred': 'fred', 'carl': ["1", "2"], 'jim': {} }] for data in inputdata: thishash = self.myService.cacheFileName('bob', inputdata=data) thishash2 = self.myService2.cacheFileName('bob', inputdata=data) self.assertNotEqual(thishash, thishash2) self.assertTrue(thishash not in hashes, '%s is not unique' % thishash) self.assertTrue(thishash2 not in hashes, '%s is not unique' % thishash2) hashes[thishash], hashes[thishash2] = None, None def testNoCache(self): """Cache disabled""" myConfig = { 'logger': self.logger, 'endpoint': 'https://github.com/dmwm', 'cachepath': None, } service = Service(myConfig) self.assertEqual(service['cachepath'], myConfig['cachepath']) self.assertEqual(service['requests']['cachepath'], myConfig['cachepath']) self.assertEqual(service['requests']['req_cache_path'], myConfig['cachepath']) out = service.refreshCache('shouldntbeused', '/').read() self.assertTrue('html' in out) @attr("integration") def notestTruncatedResponse(self): """ _TruncatedResponse_ """ cherrypy.tree.mount(CrappyServer()) cherrypy.engine.start() FORMAT = '%(message)s' logging.basicConfig(format=FORMAT) dummyLogger = logging.getLogger('john') test_dict = { 'logger': self.logger, 'endpoint': 'http://127.0.0.1:%i/truncated' % self.port, 'usestalecache': True } myService = Service(test_dict) self.assertRaises(IncompleteRead, myService.getData, 'foo', '') cherrypy.engine.exit() cherrypy.engine.stop() @attr("integration") def notestSlowResponse(self): """ _SlowResponse_ """ cherrypy.tree.mount(SlowServer()) cherrypy.engine.start() FORMAT = '%(message)s' logging.basicConfig(format=FORMAT) dummyLogger = logging.getLogger('john') test_dict = { 'logger': self.logger, 'endpoint': 'http://127.0.0.1:%i/slow' % self.port, 'usestalecache': True } myService = Service(test_dict) startTime = int(time.time()) self.assertRaises(socket.timeout, myService.getData, 'foo', '') self.assertTrue( int(time.time()) - startTime < 130, "Error: Timeout took too long") cherrypy.engine.exit() cherrypy.engine.stop() def testBadStatusLine(self): """ _BadStatusLine_ """ FORMAT = '%(message)s' logging.basicConfig(format=FORMAT) dummyLogger = logging.getLogger('john') test_dict = { 'logger': self.logger, 'endpoint': 'http://127.0.0.1:%i/badstatus' % self.port, 'usestalecache': True } myService = Service(test_dict) # Have to fudge the status line in the Request object as cherrypy won't # Allow bad statuses to be raised myService['requests'] = CrappyRequest('http://bad.com', {}) self.assertRaises(BadStatusLine, myService.getData, 'foo', '') @attr("integration") def notestZ_InterruptedConnection(self): """ _InterruptedConnection_ What happens if we shut down the server while the connection is still active? Confirm that the cache works as expected """ cherrypy.tree.mount(RegularServer(), "/reg1") cherrypy.engine.start() FORMAT = '%(message)s' logging.basicConfig(format=FORMAT) dummyLogger = logging.getLogger('john') test_dict = { 'logger': self.logger, 'endpoint': 'http://127.0.0.1:%i/reg1/regular' % self.port, 'usestalecache': True, "cacheduration": 0.005 } myService = Service(test_dict) self.assertRaises(HTTPException, myService.getData, 'foo', 'THISISABADURL') data = myService.refreshCache('foo', '') dataString = data.read() self.assertEqual(dataString, "This is silly.") data.close() # Now stop the server and confirm that it is down cherrypy.server.stop() self.assertRaises(socket.error, myService.forceRefresh, 'foo', '') # Make sure we can still read from the cache data = myService.refreshCache('foo', '') dataString = data.read() self.assertEqual(dataString, "This is silly.") data.close() # Mount a backup server del cherrypy.tree.apps['/reg1'] cherrypy.tree.mount(BackupServer(), "/reg1") # Expire cache time.sleep(30) self.assertRaises(socket.error, myService.forceRefresh, 'foo', '') # get expired cache results while the server is down data = myService.refreshCache('foo', '') dataString = data.read() self.assertEqual(dataString, "This is silly.") data.close() # Restart server cherrypy.server.start() # Confirm new server is in place data = myService.refreshCache('foo', '') dataString = data.read() self.assertEqual(dataString, "This is nuts.") data.close() cherrypy.engine.exit() cherrypy.engine.stop() return
class TaskChainTests(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("taskchain_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("taskchain_t") self.testInit.generateWorkDir() self.workload = None self.differentNCores = getTestFile( 'data/ReqMgr/requests/Integration/TaskChain_RelVal_Multicore.json') return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def testGeneratorWorkflow(self): """ _testGeneratorWorkflow_ Test creating a request with an initial generator task it mocks a request where there are 2 similar paths starting from the generator, each one with a different PrimaryDataset, CMSSW configuration and processed dataset. Dropping the RAW output as well. Also include an ignored output module to keep things interesting... """ generatorDoc = makeGeneratorConfig(self.configDatabase) processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 6, "IgnoredOutputModules": ["writeSkim2", "writeRAWDEBUGDIGI"], "Task1": { "TaskName": "GenSim", "ConfigCacheID": generatorDoc, "SplittingAlgo": "EventBased", "RequestNumEvents": 10000, "Seeding": "AutomaticSeeding", "PrimaryDataset": "RelValTTBar", }, "Task2": { "TaskName": "DigiHLT_new", "InputTask": "GenSim", "InputFromOutputModule": "writeGENSIM", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "LumiBased", "CMSSWVersion": "CMSSW_5_2_6", "GlobalTag": "GR_39_P_V5:All", "PrimaryDataset": "PURelValTTBar", "KeepOutput": False }, "Task3": { "TaskName": "DigiHLT_ref", "InputTask": "GenSim", "InputFromOutputModule": "writeGENSIM", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "EventBased", "CMSSWVersion": "CMSSW_5_2_7", "GlobalTag": "GR_40_P_V5:All", "AcquisitionEra": "ReleaseValidationNewConditions", "ProcessingVersion": 3, "ProcessingString": "Test", "KeepOutput": False }, "Task4": { "TaskName": "Reco", "InputTask": "DigiHLT_new", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "SplittingAlgo": "FileBased", "TransientOutputModules": ["writeRECO"] }, "Task5": { "TaskName": "ALCAReco", "InputTask": "DigiHLT_ref", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['ALCAReco'], "SplittingAlgo": "LumiBased", }, "Task6": { "TaskName": "Skims", "InputTask": "Reco", "InputFromOutputModule": "writeRECO", "ConfigCacheID": processorDocs['Skims'], "SplittingAlgo": "LumiBased", } } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() # Test a malformed task chain definition arguments['Task4']['TransientOutputModules'].append('writeAOD') self.assertRaises(WMSpecFactoryException, factory.validateSchema, arguments) arguments['Task4']['TransientOutputModules'].remove('writeAOD') try: self.workload = factory.factoryWorkloadConstruction( "PullingTheChain", arguments) except Exception as ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) import traceback traceback.print_exc() self.fail(msg) testWMBSHelper = WMBSHelper(self.workload, "GenSim", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) firstTask = self.workload.getTaskByPath("/PullingTheChain/GenSim") self._checkTask(firstTask, arguments['Task1'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new"), arguments['Task2'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref"), arguments['Task3'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco" ), arguments['Task4'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_ref/ALCAReco" ), arguments['Task5'], arguments) self._checkTask( self.workload.getTaskByPath( "/PullingTheChain/GenSim/GenSimMergewriteGENSIM/DigiHLT_new/Reco/Skims" ), arguments['Task6'], arguments) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 11, "Number of output datasets doesn't match") self.assertTrue( "/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM" in outputDatasets, "/RelValTTBar/ReleaseValidation-GenSimFilter-v1/GEN-SIM not in output datasets" ) self.assertFalse( "/RelValTTBar/ReleaseValidation-reco-v1/RECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-reco-v1/RECO in output datasets") self.assertTrue( "/RelValTTBar/ReleaseValidation-AOD-v1/AOD" in outputDatasets, "/RelValTTBar/ReleaseValidation-AOD-v1/AOD not in output datasets") self.assertTrue( "/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO" in outputDatasets, "/RelValTTBar/ReleaseValidation-alca-v1/ALCARECO not in output datasets" ) for i in range(1, 5): self.assertTrue( "/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-alca%d-v1/ALCARECO not in output datasets" % i) for i in range(1, 6): if i == 2: continue self.assertTrue( "/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD" % i in outputDatasets, "/RelValTTBar/ReleaseValidation-skim%d-v1/RECO-AOD not in output datasets" % i) return def _checkTask(self, task, taskConf, centralConf): """ _checkTask_ Verify the correctness of the task """ if taskConf.get("InputTask") is not None: inpTaskPath = task.getPathName() inpTaskPath = inpTaskPath.replace(task.name(), "") inpTaskPath += "cmsRun1" self.assertEqual(task.data.input.inputStep, inpTaskPath, "Input step is wrong in the spec") self.assertTrue( taskConf["InputTask"] in inpTaskPath, "Input task is not in the path name for child task") if "MCPileup" in taskConf or "DataPileup" in taskConf: mcDataset = taskConf.get('MCPileup', None) dataDataset = taskConf.get('DataPileup', None) if mcDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.mc.dataset, [mcDataset]) if dataDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.data.dataset, [dataDataset]) workflow = Workflow(name=self.workload.name(), task=task.getPathName()) workflow.load() outputMods = outputModuleList(task) ignoredOutputMods = task.getIgnoredOutputModulesForTask() outputMods = set(outputMods) - set(ignoredOutputMods) self.assertEqual(len(workflow.outputMap.keys()), len(outputMods), "Error: Wrong number of WF outputs") for outputModule in outputMods: filesets = workflow.outputMap[outputModule][0] merged = filesets['merged_output_fileset'] unmerged = filesets['output_fileset'] merged.loadData() unmerged.loadData() mergedset = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule + "/merged-Merged" if outputModule == "logArchive" or not taskConf.get("KeepOutput", True) \ or outputModule in taskConf.get("TransientOutputModules", []) or outputModule in centralConf.get("IgnoredOutputModules", []): mergedset = task.getPathName() + "/unmerged-" + outputModule unmergedset = task.getPathName() + "/unmerged-" + outputModule self.assertEqual(mergedset, merged.name, "Merged fileset name is wrong") self.assertEqual(unmergedset, unmerged.name, "Unmerged fileset name is wrong") if outputModule != "logArchive" and taskConf.get("KeepOutput", True) \ and outputModule not in taskConf.get("TransientOutputModules", []) \ and outputModule not in centralConf.get("IgnoredOutputModules", []): mergeTask = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule mergeWorkflow = Workflow(name=self.workload.name(), task=mergeTask) mergeWorkflow.load() self.assertTrue( "Merged" in mergeWorkflow.outputMap, "Merge workflow does not contain a Merged output key") mergedOutputMod = mergeWorkflow.outputMap['Merged'][0] mergedFileset = mergedOutputMod['merged_output_fileset'] unmergedFileset = mergedOutputMod['output_fileset'] mergedFileset.loadData() unmergedFileset.loadData() self.assertEqual(mergedFileset.name, mergedset, "Merged fileset name in merge task is wrong") self.assertEqual( unmergedFileset.name, mergedset, "Unmerged fileset name in merge task is wrong") mrgLogArch = mergeWorkflow.outputMap['logArchive'][0][ 'merged_output_fileset'] umrgLogArch = mergeWorkflow.outputMap['logArchive'][0][ 'output_fileset'] mrgLogArch.loadData() umrgLogArch.loadData() archName = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule + "/merged-logArchive" self.assertEqual( mrgLogArch.name, archName, "LogArchive merged fileset name is wrong in merge task") self.assertEqual( umrgLogArch.name, archName, "LogArchive unmerged fileset name is wrong in merge task") if outputModule != "logArchive": taskOutputMods = task.getOutputModulesForStep( stepName="cmsRun1") currentModule = getattr(taskOutputMods, outputModule) if taskConf.get("PrimaryDataset") is not None: self.assertEqual(currentModule.primaryDataset, taskConf["PrimaryDataset"], "Wrong primary dataset") processedDatasetParts = [ "AcquisitionEra, ProcessingString, ProcessingVersion" ] allParts = True for part in processedDatasetParts: if part in taskConf: self.assertTrue(part in currentModule.processedDataset, "Wrong processed dataset for module") else: allParts = False if allParts: self.assertEqual( "%s-%s-v%s" % (taskConf["AcquisitionEra"], taskConf["ProcessingString"], taskConf["ProcessingVersion"]), "Wrong processed dataset for module") # Test subscriptions if taskConf.get("InputTask") is None: inputFileset = "%s-%s-SomeBlock" % (self.workload.name(), task.name()) elif "Merge" in task.getPathName().split("/")[-2]: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "merged-Merged" else: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "unmerged-%s" % taskConf[ "InputFromOutputModule"] taskFileset = Fileset(name=inputFileset) taskFileset.loadData() taskSubscription = Subscription(fileset=taskFileset, workflow=workflow) taskSubscription.loadData() if taskConf.get("InputTask") is None and taskConf.get( "InputDataset") is None: # Production type self.assertEqual( taskSubscription["type"], "Production", "Error: Wrong subscription type for processing task") self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgo"], "Error: Wrong split algo for generation task") else: # Processing type self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for task") if taskSubscription["split_algo"] != "WMBSMergeBySize": self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgo'], "Splitting algo mismatch") else: self.assertEqual( taskFileset.name, inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"], "Subscription uses WMBSMergeBySize on a merge fileset") return def testMultipleGlobalTags(self): """ _testMultipleGlobalTags_ Test creating a workload that starts in a processing task with an input dataset, and has different globalTags and CMSSW versions (with corresponding scramArch) in each task """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() lumiDict = {"1": [[2, 4], [8, 50]], "2": [[100, 200], [210, 210]]} lumiDict2 = {"1": [[2, 4], [8, 40]], "2": [[100, 150], [210, 210]]} arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "DefaultGlobalTag", "LumiList": lumiDict, "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 4, "Task1": { "TaskName": "DigiHLT", "ConfigCacheID": processorDocs['DigiHLT'], "InputDataset": "/MinimumBias/Commissioning10-v4/GEN-SIM", "SplittingAlgo": "FileBased", }, "Task2": { "TaskName": "Reco", "InputTask": "DigiHLT", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "SplittingAlgo": "FileBased", "GlobalTag": "GlobalTagForReco", "CMSSWVersion": "CMSSW_3_1_2", "ScramArch": "CompatibleRECOArch", "PrimaryDataset": "ZeroBias", "LumiList": lumiDict2, }, "Task3": { "TaskName": "ALCAReco", "InputTask": "Reco", "InputFromOutputModule": "writeALCA", "ConfigCacheID": processorDocs['ALCAReco'], "SplittingAlgo": "FileBased", "GlobalTag": "GlobalTagForALCAReco", "CMSSWVersion": "CMSSW_3_1_3", "ScramArch": "CompatibleALCAArch", }, "Task4": { "TaskName": "Skims", "InputTask": "Reco", "InputFromOutputModule": "writeRECO", "ConfigCacheID": processorDocs['Skims'], "SplittingAlgo": "FileBased", } } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() try: self.workload = factory.factoryWorkloadConstruction( "YankingTheChain", arguments) except Exception as ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._checkTask( self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1'], arguments) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2'], arguments) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco" ), arguments['Task3'], arguments) self._checkTask( self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims" ), arguments['Task4'], arguments) digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT") self.assertEqual(lumiDict, digi.getLumiMask()) digiStep = digi.getStepHelper("cmsRun1") self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(digiStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(digiStep.getScramArch(), arguments['ScramArch']) # Make sure this task has a different lumilist than the global one reco = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco") self.assertEqual(lumiDict2, reco.getLumiMask()) recoStep = reco.getStepHelper("cmsRun1") self.assertEqual(recoStep.getGlobalTag(), arguments['Task2']['GlobalTag']) self.assertEqual(recoStep.getCMSSWVersion(), arguments['Task2']['CMSSWVersion']) self.assertEqual(recoStep.getScramArch(), arguments['Task2']['ScramArch']) alca = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco" ) self.assertEqual(lumiDict, alca.getLumiMask()) alcaStep = alca.getStepHelper("cmsRun1") self.assertEqual(alcaStep.getGlobalTag(), arguments['Task3']['GlobalTag']) self.assertEqual(alcaStep.getCMSSWVersion(), arguments['Task3']['CMSSWVersion']) self.assertEqual(alcaStep.getScramArch(), arguments['Task3']['ScramArch']) skim = self.workload.getTaskByPath( "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims" ) skimStep = skim.getStepHelper("cmsRun1") self.assertEqual(skimStep.getGlobalTag(), arguments['GlobalTag']) self.assertEqual(skimStep.getCMSSWVersion(), arguments['CMSSWVersion']) self.assertEqual(skimStep.getScramArch(), arguments['ScramArch']) # Verify the output datasets outputDatasets = self.workload.listOutputDatasets() self.assertEqual(len(outputDatasets), 14, "Number of output datasets doesn't match") self.assertTrue( "/MinimumBias/ReleaseValidation-RawDigiFilter-v1/RAW-DIGI" in outputDatasets, "/MinimumBias/ReleaseValidation-RawDigiFilter-v1/RAW-DIGI not in output datasets" ) self.assertTrue( "/MinimumBias/ReleaseValidation-RawDebugDigiFilter-v1/RAW-DEBUG-DIGI" in outputDatasets, "/MinimumBias/ReleaseValidation-RawDebugDigiFilter-v1/RAW-DEBUG-DIGI not in output datasets" ) self.assertTrue( "/ZeroBias/ReleaseValidation-reco-v1/RECO" in outputDatasets, "/ZeroBias/ReleaseValidation-reco-v1/RECO not in output datasets") self.assertTrue( "/ZeroBias/ReleaseValidation-AOD-v1/AOD" in outputDatasets, "/ZeroBias/ReleaseValidation-AOD-v1/AOD not in output datasets") self.assertTrue( "/ZeroBias/ReleaseValidation-alca-v1/ALCARECO" in outputDatasets, "/ZeroBias/ReleaseValidation-alca-v1/ALCARECO not in output datasets" ) for i in range(1, 5): self.assertTrue( "/MinimumBias/ReleaseValidation-alca%d-v1/ALCARECO" % i in outputDatasets, "/MinimumBias/ReleaseValidation-alca%d-v1/ALCARECO not in output datasets" % i) for i in range(1, 6): self.assertTrue( "/MinimumBias/ReleaseValidation-skim%d-v1/RECO-AOD" % i in outputDatasets, "/MinimumBias/ReleaseValidation-skim%d-v1/RECO-AOD not in output datasets" % i) return def testMultithreadedTaskChain(self): """ Test for multithreaded task chains where all steps run with the same number of cores """ arguments = self.buildMultithreadedTaskChain(self.differentNCores) arguments['Task1']['Multicore'] = 4 arguments['Task2']['Multicore'] = 4 arguments['Task3']['Multicore'] = 4 factory = TaskChainWorkloadFactory() try: self.workload = factory.factoryWorkloadConstruction( "MultiChain", arguments) except Exception as ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) hlt = self.workload.getTaskByPath('/MultiChain/HLTD') reco = self.workload.getTaskByPath( '/MultiChain/HLTD/HLTDMergewriteRAWDIGI/RECODreHLT') miniAOD = self.workload.getTaskByPath( '/MultiChain/HLTD/HLTDMergewriteRAWDIGI/RECODreHLT/RECODreHLTMergewriteALCA/MINIAODDreHLT' ) hltStep = hlt.getStepHelper("cmsRun1") recoStep = reco.getStepHelper("cmsRun1") miniAODStep = miniAOD.getStepHelper("cmsRun1") self.assertEqual(hltStep.getNumberOfCores(), 4) self.assertEqual(recoStep.getNumberOfCores(), 4) self.assertEqual(miniAODStep.getNumberOfCores(), 4) return def testMultithreadedTasksTaskChain(self): """ Test for multithreaded task chains where each step may run with a different number of cores """ arguments = self.buildMultithreadedTaskChain(self.differentNCores) factory = TaskChainWorkloadFactory() try: self.workload = factory.factoryWorkloadConstruction( "MultiChain2", arguments) except Exception as ex: msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex) self.fail(msg) hlt = self.workload.getTaskByPath('/MultiChain2/HLTD') reco = self.workload.getTaskByPath( '/MultiChain2/HLTD/HLTDMergewriteRAWDIGI/RECODreHLT') miniAOD = self.workload.getTaskByPath( '/MultiChain2/HLTD/HLTDMergewriteRAWDIGI/RECODreHLT/RECODreHLTMergewriteALCA/MINIAODDreHLT' ) hltMemory = hlt.jobSplittingParameters( )['performance']['memoryRequirement'] recoMemory = reco.jobSplittingParameters( )['performance']['memoryRequirement'] aodMemory = miniAOD.jobSplittingParameters( )['performance']['memoryRequirement'] hltStep = hlt.getStepHelper("cmsRun1") recoStep = reco.getStepHelper("cmsRun1") miniAODStep = miniAOD.getStepHelper("cmsRun1") self.assertEqual(hltStep.getNumberOfCores(), 4) self.assertEqual(recoStep.getNumberOfCores(), 8) self.assertEqual(miniAODStep.getNumberOfCores(), 1) self.assertEqual(recoMemory, 8000.0) self.assertEqual(aodMemory, 2000.0) self.assertEqual(hltMemory, 4000.0) return def testPileupTaskChain(self): """ Test for multithreaded task chains where each step may run with a different number of cores """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() arguments = { "AcquisitionEra": "ReleaseValidation", "Requestor": "*****@*****.**", "CMSSWVersion": "CMSSW_3_5_8", "ScramArch": "slc5_ia32_gcc434", "ProcessingVersion": 1, "GlobalTag": "GR10_P_v4::All", "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, "SiteWhitelist": ["T1_CH_CERN", "T1_US_FNAL"], "DashboardHost": "127.0.0.1", "DashboardPort": 8884, "TaskChain": 2, "Task1": { "InputDataset": "/cosmics/whatever-input-v1/GEN-SIM", "TaskName": "DIGI", "ConfigCacheID": processorDocs['DigiHLT'], "SplittingAlgo": "LumiBased", "LumisPerJob": 4, "MCPileup": "/some/cosmics-mc-v1/GEN-SIM", "DeterministicPileup": True, "CMSSWVersion": "CMSSW_5_2_6", "GlobalTag": "GR_39_P_V5:All", "PrimaryDataset": "PURelValTTBar", "AcquisitionEra": "CMSSW_5_2_6", "ProcessingString": "ProcStr_Task1" }, "Task2": { "TaskName": "RECO", "InputTask": "DIGI", "InputFromOutputModule": "writeRAWDIGI", "ConfigCacheID": processorDocs['Reco'], "DataPileup": "/some/minbias-data-v1/GEN-SIM", "SplittingAlgo": "LumiBased", "LumisPerJob": 2, "GlobalTag": "GR_R_62_V3::All", "AcquisitionEra": "CMSSW_5_2_7", "ProcessingString": "ProcStr_Task2" }, } testArguments.update(arguments) arguments = testArguments factory = TaskChainWorkloadFactory() self.workload = factory.factoryWorkloadConstruction( "PullingTheChain", arguments) firstTask = self.workload.getTaskByPath("/PullingTheChain/DIGI") cmsRunStep = firstTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertFalse(hasattr(pileupData, "data")) self.assertEqual(pileupData.mc.dataset, ["/some/cosmics-mc-v1/GEN-SIM"]) splitting = firstTask.jobSplittingParameters() self.assertTrue(splitting["deterministicPileup"]) secondTask = self.workload.getTaskByPath( "/PullingTheChain/DIGI/DIGIMergewriteRAWDIGI/RECO") cmsRunStep = secondTask.getStep("cmsRun1").getTypeHelper() pileupData = cmsRunStep.getPileup() self.assertFalse(hasattr(pileupData, "mc")) self.assertEqual(pileupData.data.dataset, ["/some/minbias-data-v1/GEN-SIM"]) splitting = secondTask.jobSplittingParameters() self.assertFalse(splitting["deterministicPileup"]) def buildMultithreadedTaskChain(self, filename): """ d Build a TaskChain from several sources and customization """ processorDocs = makeProcessingConfigs(self.configDatabase) testArguments = TaskChainWorkloadFactory.getTestArguments() # Read in the request request = json.load(open(filename)) # Construct args from the pieces starting with test args ... arguments = testArguments # ... continuing with the request for key in [ 'CMSSWVersion', 'ScramArch', 'GlobalTag', 'ProcessingVersion', 'Multicore', 'Memory', 'TaskChain', 'Task1', 'Task2', 'Task3' ]: arguments.update({key: request['createRequest'][key]}) for key in ['SiteBlacklist']: arguments.update({key: request['assignRequest'][key]}) # ... then some local overrides del arguments['ConfigCacheID'] del arguments['ConfigCacheUrl'] arguments.update({ "CouchURL": self.testInit.couchUrl, "CouchDBName": self.testInit.couchDbName, }) # ... now fill in the ConfigCache documents created and override the inputs to link them up arguments['Task1']['ConfigCacheID'] = processorDocs['DigiHLT'] arguments['Task2']['ConfigCacheID'] = processorDocs['Reco'] arguments['Task2']['InputFromOutputModule'] = 'writeRAWDIGI' arguments['Task3']['ConfigCacheID'] = processorDocs['ALCAReco'] arguments['Task3']['InputFromOutputModule'] = 'writeALCA' return arguments
class PrivateMCTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("privatemc_t", "ConfigCache") self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("privatemc_t") self.testDir = self.testInit.generateWorkDir() return def injectAnalysisConfig(self): """ Create a bogus config cache document for the analysis workflow and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["pset_hash"] = "21cb400c6ad63c3a97fa93f8e8785127" newConfig["owner"] = {"group": "Analysis", "user": "******"} newConfig["pset_tweak_details"] ={"process": {"outputModules_": ["OutputA", "OutputB"], "OutputA": {"dataset": {"filterName": "OutputAFilter", "dataTier": "RECO"}}, "OutputB": {"dataset": {"filterName": "OutputBFilter", "dataTier": "USER"}}}} result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def testPrivateMC(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "privatemc_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig() defaultArguments["ProcessingVersion"] = 1 processingFactory = PrivateMCWorkloadFactory() testWorkload = processingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys())) logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-PrivateMC-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "PrivateMC", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/PrivateMC/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
class ServiceTest(unittest.TestCase): def setUp(self): """ Setup for unit tests """ self.testInit = TestInit(__file__) self.testDir = self.testInit.generateWorkDir() testname = self.id().split('.')[-1] logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M', filename='service_unittests.log', filemode='w') logger_name = 'Service%s' % testname.replace('test', '', 1) self.logger = logging.getLogger(logger_name) #self.cache_path = tempfile.mkdtemp() test_dict = {'logger': self.logger, #'cachepath' : self.cache_path, #'req_cache_path': '%s/requests' % self.cache_path, 'endpoint':'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi'} self.myService = Service(test_dict) test_dict['endpoint'] = 'http://cmssw-test.cvs.cern.ch/cgi-bin/cmssw.cgi' self.myService2 = Service(test_dict) self.testUrl = 'http://cern.ch' self.port = 8888 cherrypy.config.update({'server.socket_port': self.port}) def tearDown(self): testname = self.id().split('.')[-1] #shutil.rmtree(self.cache_path, ignore_errors = True) self.testInit.delWorkDir() if self._exc_info()[0] == None: self.logger.info('test "%s" passed' % testname) else: self.logger.info('test "%s" failed' % testname) def testClear(self): """ Populate the cache, and then check that it's deleted """ f = self.myService.refreshCache('testClear', '/COMP/WMCORE/src/python/WMCore/Services/Service.py?view=markup') assert os.path.exists(f.name) f.close() self.myService.clearCache('testClear') assert not os.path.exists(f.name) def testClearAndRepopulate(self): """ Populate the cache, and then check that it's deleted """ f = self.myService.refreshCache('testClear', '/COMP/WMCORE/src/python/WMCore/Services/Service.py?view=markup') assert os.path.exists(f.name) f.close() self.myService.clearCache('testClear') assert not os.path.exists(f.name) f = self.myService.refreshCache('testClear', '/COMP/WMCORE/src/python/WMCore/Services/Service.py?view=markup') assert os.path.exists(f.name) f.close() def testCachePath(self): cache_path = tempfile.mkdtemp() dict = {'logger': self.logger, 'endpoint':'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cachepath' : cache_path, 'req_cache_path': '%s/requests' % cache_path } service = Service(dict) # We append hostname to the cachepath, so that we can talk to two # services on different hosts self.assertEqual(service['cachepath'], '%s/cmssw.cvs.cern.ch' % dict['cachepath'] ) shutil.rmtree(cache_path, ignore_errors = True) @attr("integration") def testCacheLifetime(self): """Cache deleted if created by Service - else left alone""" dict = {'logger': self.logger, 'endpoint':'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cacheduration': 100} os.environ.pop('TMPDIR', None) # Mac sets this by default service = Service(dict) cache_path = service['cachepath'] self.assertTrue(os.path.isdir(cache_path)) del service self.assertFalse(os.path.exists(cache_path)) cache_path = tempfile.mkdtemp() dict['cachepath'] = cache_path service = Service(dict) del service self.assertTrue(os.path.isdir(cache_path)) Permissions.owner_readwriteexec(cache_path) def testCachePermissions(self): """Raise error if pre-defined cache permission loose""" cache_path = tempfile.mkdtemp() sub_cache_path = os.path.join(cache_path, 'cmssw.cvs.cern.ch') os.makedirs(sub_cache_path, 0777) dict = {'logger': self.logger, 'endpoint':'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cacheduration': 100, 'cachepath' : cache_path} self.assertRaises(AssertionError, Service, dict) def testCacheDuration(self): dict = {'logger': self.logger, 'endpoint':'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cacheduration': 100, #'cachepath' : self.cache_path, #'req_cache_path': '%s/requests' % self.cache_path } service = Service(dict) self.assertEqual( service['cacheduration'] , dict['cacheduration'] ) def testNoCacheDuration(self): dict = {'logger': self.logger, 'endpoint':'http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi', 'cacheduration': None, #'cachepath' : self.cache_path, #'req_cache_path': '%s/requests' % self.cache_path } service = Service(dict) self.assertEqual( service['cacheduration'] , dict['cacheduration'] ) def testSocketTimeout(self): dict = {'logger': self.logger, 'endpoint':'http://cmssw.cvs.cern.ch/', 'cacheduration': None, 'timeout': 10, #'cachepath' : self.cache_path, #'req_cache_path': '%s/requests' % self.cache_path } service = Service(dict) deftimeout = socket.getdefaulttimeout() service.getData('%s/socketresettest' % self.testDir, '/cgi-bin/cmssw.cgi') assert deftimeout == socket.getdefaulttimeout() def testStaleCache(self): dict = {'logger': self.logger, 'endpoint':'http://cmssw.cvs.cern.ch', 'cacheduration': 0.0002, 'maxcachereuse': 0.001, 'timeout': 10, 'usestalecache': True, #'cachepath' : self.cache_path, #'req_cache_path': '%s/requests' % self.cache_path } service = Service(dict) cache = 'stalecachetest' # Start test from a clear cache service.clearCache(cache) cachefile = service.cacheFileName(cache) # first check that the exception raises when the file doesn't exist self.logger.info('first call to refreshCache - should fail') self.assertRaises(HTTPException, service.refreshCache, cache, '/lies') cacheddata = 'this data is mouldy' f = open(cachefile, 'w') f.write(cacheddata) f.close() self.logger.info('second call to refreshCache - should pass') data = service.refreshCache(cache, '/lies').read() self.assertEquals(cacheddata, data) # sleep a while so the file expires in the cache # FIXME: RACY time.sleep(2) self.logger.info('third call to refreshCache - should return stale cache') data = service.refreshCache(cache, '/lies').read() self.assertEquals(cacheddata, data) # sleep a while longer so the cache is dead # FIXME: RACY time.sleep(5) self.logger.info('fourth call to refreshCache - cache should be dead') self.assertRaises(HTTPException, service.refreshCache, cache, '/lies') # touch the file and expire it f = open(cachefile, 'w') f.write('foo') f.close() time.sleep(2) self.logger.info('fifth call to refreshCache - do not use stale cache') # now our service cache is less permissive, the following should fail service['usestalecache'] = False self.assertRaises(HTTPException, service.refreshCache, cache, '/lies') service.cacheFileName(cache) def testCacheFileName(self): """Hash url + data to get cache file name""" hashes = {} inputdata = [{}, {'fred' : 'fred'}, {'fred' : 'fred', 'carl' : [1, 2]}, {'fred' : 'fred', 'carl' : ["1", "2"]}, {'fred' : 'fred', 'carl' : ["1", "2"], 'jim' : {}} ] for data in inputdata: thishash = self.myService.cacheFileName('bob', inputdata = data) thishash2 = self.myService2.cacheFileName('bob', inputdata = data) self.assertNotEqual(thishash, thishash2) self.assert_(thishash not in hashes, '%s is not unique' % thishash) self.assert_(thishash2 not in hashes, '%s is not unique' % thishash2) hashes[thishash], hashes[thishash2] = None, None @attr("integration") def testTruncatedResponse(self): """ _TruncatedResponse_ """ cherrypy.tree.mount(CrappyServer()) cherrypy.engine.start() FORMAT = '%(message)s' logging.basicConfig(format=FORMAT) logger = logging.getLogger('john') test_dict = {'logger': self.logger,'endpoint':'http://127.0.0.1:%i/truncated' % self.port, 'usestalecache': True} myService = Service(test_dict) self.assertRaises(IncompleteRead, myService.getData, 'foo', '') cherrypy.engine.exit() cherrypy.engine.stop() def testSlowResponse(self): """ _SlowResponse_ """ cherrypy.tree.mount(SlowServer()) cherrypy.engine.start() FORMAT = '%(message)s' logging.basicConfig(format=FORMAT) logger = logging.getLogger('john') test_dict = {'logger': self.logger,'endpoint':'http://127.0.0.1:%i/slow' % self.port, 'usestalecache': True} myService = Service(test_dict) startTime = int(time.time()) self.assertRaises(socket.timeout, myService.getData, 'foo', '') self.assertTrue(int(time.time()) - startTime < 130, "Error: Timeout took too long") cherrypy.engine.exit() cherrypy.engine.stop() def testBadStatusLine(self): """ _BadStatusLine_ """ FORMAT = '%(message)s' logging.basicConfig(format=FORMAT) logger = logging.getLogger('john') test_dict = {'logger': self.logger,'endpoint':'http://127.0.0.1:%i/badstatus' % self.port, 'usestalecache': True} myService = Service(test_dict) # Have to fudge the status line in the Request object as cherrypy won't # Allow bad statuses to be raised myService['requests'] = CrappyRequest('http://bad.com', {}) self.assertRaises(BadStatusLine, myService.getData, 'foo', '') def testZ_InterruptedConnection(self): """ _InterruptedConnection_ What happens if we shut down the server while the connection is still active? Confirm that the cache works as expected """ cherrypy.tree.mount(RegularServer(), "/reg1") cherrypy.engine.start() FORMAT = '%(message)s' logging.basicConfig(format=FORMAT) logger = logging.getLogger('john') test_dict = {'logger': self.logger,'endpoint':'http://127.0.0.1:%i/reg1/regular' % self.port, 'usestalecache': True, "cacheduration": 0.005} myService = Service(test_dict) self.assertRaises(HTTPException, myService.getData, 'foo', 'THISISABADURL') data = myService.refreshCache('foo', '') dataString = data.read() self.assertEqual(dataString, "This is silly.") data.close() # Now stop the server and confirm that it is down cherrypy.server.stop() self.assertRaises(socket.error, myService.forceRefresh, 'foo', '') # Make sure we can still read from the cache data = myService.refreshCache('foo', '') dataString = data.read() self.assertEqual(dataString, "This is silly.") data.close() # Mount a backup server del cherrypy.tree.apps['/reg1'] cherrypy.tree.mount(BackupServer(), "/reg1") # Expire cache time.sleep(30) self.assertRaises(socket.error, myService.forceRefresh, 'foo', '') # get expired cache results while the server is down data = myService.refreshCache('foo', '') dataString = data.read() self.assertEqual(dataString, "This is silly.") data.close() # Restart server cherrypy.server.start() # Confirm new server is in place data = myService.refreshCache('foo', '') dataString = data.read() self.assertEqual(dataString, "This is nuts.") data.close() cherrypy.engine.exit() cherrypy.engine.stop() return
class DQMHarvestTests(unittest.TestCase): """ _DQMHarvestTests_ Tests the DQMHarvest spec file """ def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("dqmharvest_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("dqmharvest_t") self.testInit.generateWorkDir() self.workload = None self.jsonTemplate = getTestFile('data/ReqMgr/requests/DMWM/DQMHarvesting.json') return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def injectDQMHarvestConfig(self): """ _injectDQMHarvest_ Create a bogus config cache document for DQMHarvest and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e234f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10876a7" newConfig["owner"] = {"group": "DATAOPS", "user": "******"} newConfig["pset_tweak_details"] = {"process": {"outputModules_": []}} result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def testDQMHarvest(self): """ Build a DQMHarvest workload """ testArguments = DQMHarvestWorkloadFactory.getTestArguments() # Read in the request request = json.load(open(self.jsonTemplate)) testArguments.update(request['createRequest']) testArguments.update({ "CouchURL": os.environ["COUCHURL"], "ConfigCacheUrl": os.environ["COUCHURL"], "CouchDBName": "dqmharvest_t", "DQMConfigCacheID": self.injectDQMHarvestConfig() }) testArguments.pop("ConfigCacheID", None) factory = DQMHarvestWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) # test workload properties self.assertEqual(testWorkload.getDashboardActivity(), "harvesting") self.assertEqual(testWorkload.getCampaign(), "Campaign-OVERRIDE-ME") self.assertEqual(testWorkload.getAcquisitionEra(), "CMSSW_7_3_1_patch1") self.assertEqual(testWorkload.getProcessingString(), "GR_R_73_V0A_TEST_RelVal_jetHT2012c") self.assertEqual(testWorkload.getProcessingVersion(), 1) self.assertFalse(testWorkload.getPrepID(), "PrepId does not match") self.assertEqual(testWorkload.getCMSSWVersions(), ['CMSSW_7_3_1_patch1']) # test workload attributes self.assertEqual(testWorkload.processingString, "GR_R_73_V0A_TEST_RelVal_jetHT2012c") self.assertEqual(testWorkload.acquisitionEra, "CMSSW_7_3_1_patch1") self.assertEqual(testWorkload.processingVersion, 1) self.assertEqual(sorted(testWorkload.lumiList.keys()), ['139788', '139790', '144011']) self.assertEqual(sorted(testWorkload.lumiList.values()), [[[5, 10], [15, 20], [25, 30]], [[25, 75], [125, 175], [275, 325]], [[50, 100], [110, 125]]]) self.assertEqual(testWorkload.data.policies.start.policyName, "Dataset") # test workload tasks and steps tasks = testWorkload.listAllTaskNames() self.assertEqual(len(tasks), 2) self.assertEqual(sorted(tasks), ['EndOfRunDQMHarvest', 'EndOfRunDQMHarvestLogCollect']) task = testWorkload.getTask(tasks[0]) self.assertEqual(task.name(), "EndOfRunDQMHarvest") self.assertEqual(task.getPathName(), "/TestWorkload/EndOfRunDQMHarvest") self.assertEqual(task.taskType(), "Harvesting", "Wrong task type") self.assertEqual(task.jobSplittingAlgorithm(), "Harvest", "Wrong job splitting algo") self.assertFalse(task.getTrustSitelists().get('trustlists'), "Wrong input location flag") self.assertEqual(sorted(task.inputRunWhitelist()), [138923, 138924, 138934, 138937, 139788, 139789, 139790, 144011, 144083, 144084, 144086]) self.assertEqual(sorted(task.listAllStepNames()), ['cmsRun1', 'logArch1', 'upload1']) self.assertEqual(task.getStep("cmsRun1").stepType(), "CMSSW") self.assertEqual(task.getStep("logArch1").stepType(), "LogArchive") self.assertEqual(task.getStep("upload1").stepType(), "DQMUpload") return def testDQMHarvestFailed(self): """ Build a DQMHarvest workload without a DQM config doc """ testArguments = DQMHarvestWorkloadFactory.getTestArguments() # Read in the request request = json.load(open(self.jsonTemplate)) testArguments.update(request['createRequest']) testArguments.update({ "CouchURL": os.environ["COUCHURL"], "ConfigCacheUrl": os.environ["COUCHURL"], "CouchDBName": "dqmharvest_t", "ConfigCacheID": self.injectDQMHarvestConfig() }) testArguments.pop("DQMConfigCacheID", None) factory = DQMHarvestWorkloadFactory() self.assertRaises(WMSpecFactoryException, factory.validateSchema, testArguments) return
class testWMConfigCache(unittest.TestCase): """ _testWMConfigCache_ Basic test class for configCache """ def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.delWorkDir() self.testInit.tearDownCouch() return def testA_basicConfig(self): """ _basicConfig_ Basic configCache stuff. """ PSetTweak = "Hello, I am a PSetTweak. It's nice to meet you." configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.save() configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test', id=configCache.getCouchID(), rev=configCache.getCouchRev()) configCache2.loadByID(configCache.getCouchID()) self.assertEqual(configCache2.getPSetTweaks(), PSetTweak) configCache2.delete() configCache3 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test', id=configCache.getCouchID(), rev=configCache.getCouchRev()) testFlag = False # It should fail to load deleted documents try: configCache3.loadByID(configCache.getCouchID()) except ConfigCacheException: testFlag = True self.assertTrue(testFlag) return def testB_addingConfigsAndTweaks(self): """ _addingConfigsAndTweaks_ Test adding config files and tweak files """ PSetTweak = "Hello, I am a PSetTweak. It's nice to meet you." attach = "Hello, I am an attachment" configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.attachments['attach1'] = attach psetPath = os.path.join(getTestBase(), "WMCore_t/Cache_t/PSet.txt") configCache.addConfig(newConfig=psetPath, psetHash=None) configCache.setLabel("sample-label") configCache.setDescription("describe this config here") configCache.save() configString1 = configCache.getConfig() configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test', id=configCache.getCouchID(), rev=configCache.getCouchRev()) configCache2.loadByID(configCache.getCouchID()) configString2 = configCache2.getConfig() self.assertEqual(configString1, configString2) self.assertEqual(configCache2.attachments.get('attach1', None), attach) configCache.delete() return def testC_testViews(self): """ _testViews_ Prototype test for what should be a lot of other tests. """ PSetTweak = "Hello, I am a PSetTweak. It's nice to meet you." attach = "Hello, I am an attachment" configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.attachments['attach1'] = attach configCache.document['md5_hash'] = "somemd5" psetPath = os.path.join(getTestBase(), "WMCore_t/Cache_t/PSet.txt") configCache.addConfig(newConfig=psetPath, psetHash=None) configCache.save() configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache2.document['md5_hash'] = configCache.document['md5_hash'] configCache2.load() self.assertEqual(configCache2.attachments.get('attach1', None), attach) configCache2.delete() return def testD_LoadConfigCache(self): """ _LoadConfigCache_ Actually load the config cache using plain .load() Tests to make sure that if we pass in an id field it gets used to load configs """ configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.setLabel("labelA") configCache.save() configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test', id=configCache.getCouchID(), rev=configCache.getCouchRev()) configCache2.load() self.assertEqual(configCache2.document['owner'], { 'group': 'testGroup', 'user': '******' }) self.assertEqual(configCache2.document['description'], { 'config_desc': None, 'config_label': 'labelA' }) return def testE_SaveConfigFileToDisk(self): """ _SaveConfigFileToDisk_ Check and see if we can save the config file attachment to disk """ targetFile = os.path.join(self.testDir, 'configCache.test') configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.attachments['configFile'] = 'ThisIsAConfigFile' configCache.saveConfigToDisk(targetFile=targetFile) f = open(targetFile, 'r') content = f.read() f.close() self.assertEqual(content, configCache.getConfig()) return def testListAllConfigs(self): """ _testListAllConfigs_ Verify that the list all configs method works correctly. """ configCacheA = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCacheA.createUserGroup(groupname="testGroup", username='******') configCacheA.setLabel("labelA") configCacheA.save() configCacheB = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCacheB.createUserGroup(groupname="testGroup", username='******') configCacheB.setLabel("labelB") configCacheB.save() configs = configCacheA.listAllConfigsByLabel() self.assertEqual(len(configs.keys()), 2, "Error: There should be two configs") self.assertEqual(configs["labelA"], configCacheA.getCouchID(), "Error: Label A is wrong.") self.assertEqual(configs["labelB"], configCacheB.getCouchID(), "Error: Label B is wrong.") return
class RepackTests(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) self.testDir = self.testInit.generateWorkDir() myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.listTasksByWorkflow = self.daoFactory( classname="Workflow.LoadFromName") self.listFilesets = self.daoFactory(classname="Fileset.List") self.listSubsMapping = self.daoFactory( classname="Subscriptions.ListSubsAndFilesetsFromWorkflow") return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.delWorkDir() return def testRepack(self): """ _testRepack_ Create a Repack workflow and verify it installs into WMBS correctly. """ testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Repack", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) repackWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack") repackWorkflow.load() self.assertEqual( len(repackWorkflow.outputMap), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Repack WF.") goldenOutputMods = { "write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW" } for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = repackWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = repackWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_PrimaryDataset1_RAW": self.assertEqual( mergedOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Repack/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = repackWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = repackWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Repack") topLevelFileset.loadData() repackSubscription = Subscription(fileset=topLevelFileset, workflow=repackWorkflow) repackSubscription.loadData() self.assertEqual(repackSubscription["type"], "Repack", "Error: Wrong subscription type.") self.assertEqual( repackSubscription["split_algo"], "Repack", "Error: Wrong split algorithm. %s" % repackSubscription["split_algo"]) unmergedOutputs = { "write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW" } for unmergedOutput, tier in viewitems(unmergedOutputs): fset = unmergedOutput + tier unmergedDataTier = Fileset( name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "RepackMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") repackLogCollect = Fileset( name="/TestWorkload/Repack/unmerged-logArchive") repackLogCollect.loadData() repackLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/LogCollect") repackLogCollectWorkflow.load() logCollectSub = Subscription(fileset=repackLogCollect, workflow=repackLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for goldenOutputMod, tier in viewitems(goldenOutputMods): repackMergeLogCollect = Fileset( name="/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod) repackMergeLogCollect.loadData() repackMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Repack/RepackMerge%s/Repack%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) repackMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=repackMergeLogCollect, workflow=repackMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) # test default values taskPaths = ['/TestWorkload/Repack'] for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided testArguments["Multicore"] = 6 testArguments["Memory"] = 4600.0 testArguments["EventStreams"] = 3 testArguments["Outputs"] = deepcopy(REQUEST['Outputs']) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if task == '/TestWorkload/Repack' and step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), testArguments["Multicore"]) self.assertEqual(stepHelper.getNumberOfStreams(), testArguments["EventStreams"]) elif step in ('stageOut1', 'logArch1'): self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) else: self.assertEqual(stepHelper.getNumberOfCores(), 1, "%s should be single-core" % task) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], testArguments["Memory"]) return def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = [ '/TestWorkload/Repack', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW' ] expWfTasks = [ '/TestWorkload/Repack', '/TestWorkload/Repack/LogCollect', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset1_RAW', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset2_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/Repackwrite_PrimaryDataset1_RAWMergeLogCollect', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/Repackwrite_PrimaryDataset2_RAWMergeLogCollect' ] expFsets = [ 'TestWorkload-Repack-StreamerFiles', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-MergedRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-MergedErrorRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-MergedRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-MergedErrorRAW', '/TestWorkload/Repack/unmerged-write_PrimaryDataset1_RAWRAW', '/TestWorkload/Repack/unmerged-write_PrimaryDataset2_RAWRAW', '/TestWorkload/Repack/unmerged-logArchive' ] subMaps = [ (3, '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW/Repackwrite_PrimaryDataset1_RAWMergeLogCollect', 'MinFileBased', 'LogCollect'), (6, '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/merged-logArchive', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW/Repackwrite_PrimaryDataset2_RAWMergeLogCollect', 'MinFileBased', 'LogCollect'), (8, '/TestWorkload/Repack/unmerged-logArchive', '/TestWorkload/Repack/LogCollect', 'MinFileBased', 'LogCollect'), (4, '/TestWorkload/Repack/unmerged-write_PrimaryDataset1_RAWRAW', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset1_RAW', 'SiblingProcessingBased', 'Cleanup'), (2, '/TestWorkload/Repack/unmerged-write_PrimaryDataset1_RAWRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset1_RAW', 'RepackMerge', 'Merge'), (7, '/TestWorkload/Repack/unmerged-write_PrimaryDataset2_RAWRAW', '/TestWorkload/Repack/RepackCleanupUnmergedwrite_PrimaryDataset2_RAW', 'SiblingProcessingBased', 'Cleanup'), (5, '/TestWorkload/Repack/unmerged-write_PrimaryDataset2_RAWRAW', '/TestWorkload/Repack/RepackMergewrite_PrimaryDataset2_RAW', 'RepackMerge', 'Merge'), (1, 'TestWorkload-Repack-StreamerFiles', '/TestWorkload/Repack', 'Repack', 'Repack') ] testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "Repack", blockName='StreamerFiles', cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
class ReportTest(unittest.TestCase): """ _ReportTest_ Unit tests for the Report class. """ def setUp(self): """ _setUp_ Figure out the location of the XML report produced by CMSSW. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setupCouch("report_t/fwjrs", "FWJRDump") testData = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t") self.xmlPath = os.path.join(testData, "CMSSWProcessingReport.xml") self.badxmlPath = os.path.join(testData, "CMSSWFailReport2.xml") self.skippedFilesxmlPath = os.path.join( testData, "CMSSWSkippedNonExistentFile.xml") self.skippedAllFilesxmlPath = os.path.join(testData, "CMSSWSkippedAll.xml") self.fallbackXmlPath = os.path.join(testData, "CMSSWInputFallback.xml") self.twoFileFallbackXmlPath = os.path.join(testData, "CMSSWTwoFileRemote.xml") self.pileupXmlPath = os.path.join(testData, "CMSSWPileup.xml") self.withEventsXmlPath = os.path.join(testData, "CMSSWWithEventCounts.xml") self.testDir = self.testInit.generateWorkDir() return def tearDown(self): """ _tearDown_ Cleanup the databases. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def verifyInputData(self, report): """ _verifyInputData_ Verify that the input file in the Report class matches the input file in the XML generated by CMSSW. """ inputFiles = report.getInputFilesFromStep("cmsRun1") assert len(inputFiles) == 1, \ "Error: Wrong number of input files." assert inputFiles[0][ "lfn"] == "/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: Wrong LFN on input file." assert inputFiles[0][ "pfn"] == "dcap://cmsdca.fnal.gov:24137/pnfs/fnal.gov/usr/cms/WAX/11/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: Wrong PFN on input file." inputRun = list(inputFiles[0]["runs"]) assert len(inputRun) == 1, \ "Error: Wrong number of runs in input." assert inputRun[0].run == 122023, \ "Error: Wrong run number on input file." assert len(inputRun[0].lumis) == 1, \ "Error: Wrong number of lumis in input file." assert 215 in inputRun[0].lumis, \ "Error: Input file is missing lumis." assert inputFiles[0]["events"] == 2, \ "Error: Wrong number of events in input file." assert inputFiles[0]["size"] == 0, \ "Error: Wrong size in input file." assert inputFiles[0][ "catalog"] == "trivialcatalog_file:/uscmst1/prod/sw/cms/SITECONF/T1_US_FNAL/PhEDEx/storage.xml?protocol=dcap", \ "Error: Catalog on input file is wrong." assert inputFiles[0]["guid"] == "142F3F42-C5D6-DE11-945D-000423D94494", \ "Error: GUID of input file is wrong." return def verifyRecoOutput(self, report, hasEventCounts=False): """ _verifyRecoOutput_ Verify that all the metadata in the RECO output module is correct. """ outputFiles = report.getFilesFromOutputModule("cmsRun1", "outputRECORECO") assert len(outputFiles) == 1, \ "Error: Wrong number of output files." assert outputFiles[0][ "lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root", \ "Error: Wrong LFN on output file: %s" % outputFiles[0]["lfn"] assert outputFiles[0]["pfn"] == "outputRECORECO.root", \ "Error: Wrong PFN on output file." outputRun = list(outputFiles[0]["runs"]) assert len(outputRun) == 1, \ "Error: Wrong number of runs in output." assert outputRun[0].run == 122023, \ "Error: Wrong run number on output file." assert len(outputRun[0].lumis) == 1, \ "Error: Wrong number of lumis in output file." assert 215 in outputRun[0].lumis, \ "Error: Output file is missing lumis." self.assertIsInstance(outputRun[0].eventsPerLumi, dict) if hasEventCounts: self.assertEqual(outputRun[0].eventsPerLumi[215], 2) else: self.assertIsNone(outputRun[0].eventsPerLumi[215]) assert outputFiles[0]["events"] == 2, \ "Error: Wrong number of events in output file." assert outputFiles[0]["size"] == 0, \ "Error: Wrong size in output file." assert len(outputFiles[0]["input"]) == 1, \ "Error: Wrong number of input files." assert outputFiles[0]["input"][ 0] == "/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: LFN of input file is wrong." assert len(outputFiles[0]["checksums"]) == 0, \ "Error: There should be no checksums in output file." assert outputFiles[0]["catalog"] == "", \ "Error: Catalog on output file is wrong." assert outputFiles[0]["guid"] == "7E3359C8-222E-DF11-B2B0-001731230E47", \ "Error: GUID of output file is wrong: %s" % outputFiles[0]["guid"] assert outputFiles[0]["module_label"] == "outputRECORECO", \ "Error: Module label of output file is wrong." assert outputFiles[0]["branch_hash"] == "cf37adeb60b427f4ccd0e21b5771146b", \ "Error: Branch has on output file is wrong." return def verifyAlcaOutput(self, report, hasEventCounts=False): """ _verifyAlcaOutput_ Verify that all of the meta data in the ALCARECO output module is correct. """ outputFiles = report.getFilesFromOutputModule("cmsRun1", "outputALCARECORECO") assert len(outputFiles) == 1, \ "Error: Wrong number of output files." assert outputFiles[0][ "lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/ALCARECO/rereco_GR09_R_34X_V5_All_v1/0000/B8F849C9-222E-DF11-B2B0-001731230E47.root", \ "Error: Wrong LFN on output file: %s" % outputFiles[0]["lfn"] assert outputFiles[0]["pfn"] == "outputALCARECORECO.root", \ "Error: Wrong PFN on output file." outputRun = list(outputFiles[0]["runs"]) assert len(outputRun) == 1, \ "Error: Wrong number of runs in output." assert outputRun[0].run == 122023, \ "Error: Wrong run number on output file." assert len(outputRun[0].lumis) == 1, \ "Error: Wrong number of lumis in output file." assert 215 in outputRun[0].lumis, \ "Error: Output file is missing lumis." self.assertIsInstance(outputRun[0].eventsPerLumi, dict) if hasEventCounts: self.assertIsNone(outputRun[0].eventsPerLumi[215]) else: self.assertIsNone(outputRun[0].eventsPerLumi[215]) assert outputFiles[0]["events"] == 2, \ "Error: Wrong number of events in output file." assert outputFiles[0]["size"] == 0, \ "Error: Wrong size in output file." assert len(outputFiles[0]["input"]) == 1, \ "Error: Wrong number of input files." assert outputFiles[0]["input"][ 0] == "/store/data/BeamCommissioning09/MinimumBias/RAW/v1/000/122/023/142F3F42-C5D6-DE11-945D-000423D94494.root", \ "Error: LFN of input file is wrong." assert len(outputFiles[0]["checksums"]) == 0, \ "Error: There should be no checksums in output file." assert outputFiles[0]["catalog"] == "", \ "Error: Catalog on output file is wrong." assert outputFiles[0]["guid"] == "B8F849C9-222E-DF11-B2B0-001731230E47", \ "Error: GUID of output file is wrong: %s" % outputFiles[0]["guid"] assert outputFiles[0]["module_label"] == "outputALCARECORECO", \ "Error: Module label of output file is wrong." assert outputFiles[0]["branch_hash"] == "cf37adeb60b427f4ccd0e21b5771146b", \ "Error: Branch has on output file is wrong." return def testXMLParsing(self): """ _testParsing_ Verify that the parsing of a CMSSW XML report works correctly. """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) self.verifyInputData(myReport) self.verifyRecoOutput(myReport) self.verifyAlcaOutput(myReport) return def testWithEventsXMLParsing(self): """ _testParsing_ Verify that the parsing of a CMSSW XML report works correctly. """ myReport = Report("cmsRun1") myReport.parse(self.withEventsXmlPath) self.verifyInputData(myReport) self.verifyRecoOutput(myReport, hasEventCounts=True) self.verifyAlcaOutput(myReport, hasEventCounts=True) return def testBadXMLParsing(self): """ _testBadXMLParsing_ Verify that the parsing of a CMSSW XML report works correctly even if the XML is malformed. This should raise a FwkJobReportException, which in CMSSW will be caught """ myReport = Report("cmsRun1") from WMCore.FwkJobReport.Report import FwkJobReportException self.assertRaises(FwkJobReportException, myReport.parse, self.badxmlPath) self.assertEqual( myReport.getStepErrors("cmsRun1")['error0'].type, 'BadFWJRXML') self.assertEqual( myReport.getStepErrors("cmsRun1")['error0'].exitCode, 50115) return def testErrorReporting(self): """ _testErrorReporting_ Verify that errors are correctly transfered from the XML report to the python report. """ cmsException = "cms::Exception caught in cmsRun\n" cmsException += "---- EventProcessorFailure BEGIN\n" cmsException += "EventProcessingStopped\n" cmsException += "---- ScheduleExecutionFailure BEGIN\n" cmsException += "ProcessingStopped\n" cmsException += "---- NoRecord BEGIN\n" cmsException += 'No "CastorDbRecord" record found in the EventSetup.\n' cmsException += " Please add an ESSource or ESProducer that delivers such a record.\n" cmsException += "cms::Exception going through module CastorRawToDigi/castorDigis run: 121849 lumi: 1 event: 23\n" cmsException += "---- NoRecord END\n" cmsException += "Exception going through path raw2digi_step\n" cmsException += "---- ScheduleExecutionFailure END\n" cmsException += "an exception occurred during current event processing\n" cmsException += "cms::Exception caught in EventProcessor and rethrown\n" cmsException += "---- EventProcessorFailure END" xmlPath = os.path.join(getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWFailReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) assert hasattr(myReport.data.cmsRun1, "errors"), \ "Error: Error section missing." assert getattr(myReport.data.cmsRun1.errors, "errorCount") == 1, \ "Error: Error count is wrong." assert hasattr(myReport.data.cmsRun1.errors, "error0"), \ "Error: Error0 section is missing." assert myReport.data.cmsRun1.errors.error0.type == "CMSException", \ "Error: Wrong error type." assert myReport.data.cmsRun1.errors.error0.exitCode == 8001, \ "Error: Wrong exit code." assert myReport.data.cmsRun1.errors.error0.details == cmsException, \ "Error: Error details are wrong:\n|%s|\n|%s|" % (myReport.data.cmsRun1.errors.error0.details, cmsException) # Test getStepErrors self.assertEqual( myReport.getStepErrors("cmsRun1")['error0'].type, "CMSException") return def testMultipleInputs(self): """ _testMultipleInputs_ Verify that parsing XML reports with multiple inputs works correctly. """ xmlPath = os.path.join( getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWMultipleInput.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) assert hasattr(myReport.data.cmsRun1.input, "source"), \ "Error: Report missing input source." inputFiles = myReport.getInputFilesFromStep("cmsRun1") assert len(inputFiles) == 2, \ "Error: Wrong number of input files." for inputFile in inputFiles: assert inputFile["input_type"] == "primaryFiles", \ "Error: Wrong input type." assert inputFile["module_label"] == "source", \ "Error: Module label is wrong" assert inputFile[ "catalog"] == "trivialcatalog_file:/uscmst1/prod/sw/cms/SITECONF/T1_US_FNAL/PhEDEx/storage.xml?protocol=dcap", \ "Error: Catalog is wrong." assert inputFile["events"] == 2, \ "Error: Wrong number of events." assert inputFile["input_source_class"] == "PoolSource", \ "Error: Wrong input source class." if inputFile["guid"] == "F0875ECD-3347-DF11-9FE0-003048678A80": assert inputFile[ "lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/F0875ECD-3347-DF11-9FE0-003048678A80.root", \ "Error: Input LFN is wrong." assert inputFile[ "pfn"] == "dcap://cmsdca3.fnal.gov:24142/pnfs/fnal.gov/usr/cms/WAX/11/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/F0875ECD-3347-DF11-9FE0-003048678A80.root", \ "Error: Input PFN is wrong." assert len(inputFile["runs"]) == 1, \ "Error: Wrong number of runs." assert list(inputFile["runs"])[0].run == 124216, \ "Error: Wrong run number." assert 1 in list(inputFile["runs"])[0], \ "Error: Wrong lumi sections in input file." else: assert inputFile["guid"] == "626D74CE-3347-DF11-9363-0030486790C0", \ "Error: Wrong guid." assert inputFile[ "lfn"] == "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/626D74CE-3347-DF11-9363-0030486790C0.root", \ "Error: Input LFN is wrong." assert inputFile[ "pfn"] == "dcap://cmsdca3.fnal.gov:24142/pnfs/fnal.gov/usr/cms/WAX/11/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR10_P_V4_All_v1/0000/626D74CE-3347-DF11-9363-0030486790C0.root", \ "Error: Input PFN is wrong." assert len(inputFile["runs"]) == 1, \ "Error: Wrong number of runs." assert list(inputFile["runs"])[0].run == 124216, \ "Error: Wrong run number." assert 2 in list(inputFile["runs"])[0], \ "Error: Wrong lumi sections in input file." return def testJSONEncoding(self): """ _testJSONEncoding_ Verify that turning the FWJR into a JSON object works correctly. """ xmlPath = os.path.join( getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) jsonReport = myReport.__to_json__(None) assert "task" in jsonReport.keys(), \ "Error: Task name missing from report." assert len(jsonReport["steps"].keys()) == 1, \ "Error: Wrong number of steps in report." assert "cmsRun1" in jsonReport["steps"].keys(), \ "Error: Step missing from json report." cmsRunStep = jsonReport["steps"]["cmsRun1"] jsonReportSections = [ "status", "errors", "logs", "parameters", "site", "analysis", "cleanup", "input", "output", "start" ] for jsonReportSection in jsonReportSections: assert jsonReportSection in cmsRunStep.keys(), \ "Error: missing section: %s" % jsonReportSection return def testTimeSetting(self): """ _testTimeSetting_ Can we set the times correctly? """ stepName = "cmsRun1" timeDiff = 0.01 myReport = Report(stepName) localTime = time.time() myReport.setStepStartTime(stepName) myReport.setStepStopTime(stepName) repTime = myReport.getTimes(stepName) self.assertTrue(repTime["startTime"] - localTime < timeDiff) self.assertTrue(repTime["stopTime"] - localTime < timeDiff) myReport = Report("cmsRun1") myReport.addStep("cmsRun2") myReport.addStep("cmsRun3") step = myReport.retrieveStep("cmsRun1") step.startTime = 1 step.stopTime = 8 step = myReport.retrieveStep("cmsRun2") step.startTime = 2 step.stopTime = 9 step = myReport.retrieveStep("cmsRun3") step.startTime = 3 step.stopTime = 10 self.assertEqual(myReport.getFirstStartLastStop()['stopTime'], 10) self.assertEqual(myReport.getFirstStartLastStop()['startTime'], 1) return def testTaskJobID(self): """ _testTaskJobID_ Test the basic task and jobID functions """ report = Report('fake') self.assertEqual(report.getTaskName(), None) self.assertEqual(report.getJobID(), None) report.setTaskName('silly') report.setJobID(100) self.assertEqual(report.getTaskName(), 'silly') self.assertEqual(report.getJobID(), 100) return def test_PerformanceReport(self): """ _PerformanceReport_ Test the performance report part of the job report """ report = Report("cmsRun1") report.setStepVSize(stepName="cmsRun1", minimum=100, maximum=800, average=244) report.setStepRSS(stepName="cmsRun1", minimum=100, maximum=800, average=244) report.setStepPCPU(stepName="cmsRun1", minimum=100, maximum=800, average=244) report.setStepPMEM(stepName="cmsRun1", minimum=100, maximum=800, average=244) perf = report.retrieveStep("cmsRun1").performance for section in perf.dictionary_().values(): d = section.dictionary_() self.assertEqual(d['min'], 100) self.assertEqual(d['max'], 800) self.assertEqual(d['average'], 244) return def testPerformanceSummary(self): """ _testPerformanceSummary_ Test whether or not we can pull performance information out of a Timing/SimpleMemoryCheck jobReport """ xmlPath = os.path.join( getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) # Do a brief check of the three sections perf = myReport.data.cmsRun1.performance self.assertEqual(perf.memory.PeakValueRss, '492.293') self.assertEqual(perf.cpu.TotalJobCPU, '9.16361') self.assertEqual(perf.storage.writeTotalMB, 5.22226) self.assertAlmostEqual(perf.storage.writeTotalSecs, 0, places=0) # actual value is 0.06 self.assertEqual(perf.storage.readPercentageOps, 0.98585512216030857) return def testPerformanceJSON(self): """ _testPerformanceJSON_ Verify that the performance section of the report is correctly converted to JSON. """ xmlPath = os.path.join( getTestBase(), "WMCore_t/FwkJobReport_t/PerformanceReport.xml") myReport = Report("cmsRun1") myReport.parse(xmlPath) perfSection = myReport.__to_json__( thunker=None)["steps"]["cmsRun1"]["performance"] self.assertTrue("storage" in perfSection, "Error: Storage section is missing.") self.assertTrue("memory" in perfSection, "Error: Memory section is missing.") self.assertTrue("cpu" in perfSection, "Error: CPU section is missing.") self.assertEqual(perfSection["cpu"]["AvgEventCPU"], "0.626105", "Error: AvgEventCPU is wrong.") self.assertEqual(perfSection["cpu"]["TotalJobTime"], "23.5703", "Error: TotalJobTime is wrong.") self.assertEqual(perfSection["storage"]["readTotalMB"], 39.6166, "Error: readTotalMB is wrong.") self.assertEqual(perfSection["storage"]["readMaxMSec"], 320.653, "Error: readMaxMSec is wrong") self.assertEqual(perfSection["memory"]["PeakValueRss"], "492.293", "Error: PeakValueRss is wrong.") self.assertEqual(perfSection["memory"]["PeakValueVsize"], "643.281", "Error: PeakValueVsize is wrong.") return def testExitCode(self): """ _testExitCode_ Test and see if we can get an exit code out of a report Note: Errors without a return code return 99999 getStepExitCode: returns the first valid and non-zero exit code getExitCode: uses the method above to get an exit code getStepExitCodes: returns a set of all exit codes within the step """ report = Report("cmsRun1") self.assertEqual(report.getExitCode(), 0) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 0) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {}) self.assertItemsEqual(report.getStepErrors(stepName="cmsRun1"), {}) report.addError(stepName="cmsRun1", exitCode=None, errorType="test", errorDetails="test") # None is not a valid exitCode, but it will get mapped to 99999 self.assertEqual(report.getExitCode(), 99999) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 99999) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999}) self.assertEqual( report.getStepErrors(stepName="cmsRun1")['errorCount'], 1) report.addError(stepName="cmsRun1", exitCode=12345, errorType="test", errorDetails="test") self.assertEqual(report.getExitCode(), 12345) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 12345) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 12345}) self.assertEqual( report.getStepErrors(stepName="cmsRun1")['errorCount'], 2) report.addError(stepName="cmsRun1", exitCode=123, errorType="test", errorDetails="test") self.assertEqual(report.getExitCode(), 12345) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 12345) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 12345, 123}) self.assertEqual( report.getStepErrors(stepName="cmsRun1")['errorCount'], 3) # now try to record the same exit code once again report.addError(stepName="cmsRun1", exitCode=12345, errorType="test", errorDetails="test") self.assertEqual(report.getExitCode(), 12345) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 12345) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 12345, 123}) self.assertEqual( report.getStepErrors(stepName="cmsRun1")['errorCount'], 3) # and once again, but different type and details (which does not matter) report.addError(stepName="cmsRun1", exitCode=12345, errorType="testAA", errorDetails="testAA") self.assertEqual(report.getExitCode(), 12345) self.assertEqual(report.getStepExitCode(stepName="cmsRun1"), 12345) self.assertItemsEqual(report.getStepExitCodes(stepName="cmsRun1"), {99999, 12345, 123}) self.assertEqual( report.getStepErrors(stepName="cmsRun1")['errorCount'], 3) def testProperties(self): """ _testProperties_ Test data fields for the properties information for DBS """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) name = "ThisIsASillyString" myReport.setValidStatus(name) myReport.setGlobalTag(name) myReport.setAcquisitionProcessing(acquisitionEra='NULL', processingVer=name) myReport.setInputDataset(inputPath='/lame/path') for f in myReport.getAllFilesFromStep("cmsRun1"): self.assertEqual(f['globalTag'], name) self.assertEqual(f['validStatus'], name) self.assertEqual(f['processingVer'], name) self.assertEqual(f['acquisitionEra'], 'NULL') self.assertEqual(f['inputPath'], '/lame/path') return def testOutputFiles(self): """ _testOutputFiles_ Test some basic manipulation of output files """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) files = myReport.getAllFilesFromStep(step="cmsRun1") f1 = files[0] f2 = files[1] self.assertEqual(f1['outputModule'], 'outputRECORECO') self.assertEqual(f1['pfn'], 'outputRECORECO.root') self.assertEqual(f2['outputModule'], 'outputALCARECORECO') self.assertEqual(f2['pfn'], 'outputALCARECORECO.root') for f in files: self.assertEqual(f['events'], 2) self.assertEqual(f['configURL'], None) self.assertEqual(f['merged'], False) self.assertEqual(f['validStatus'], None) self.assertEqual(f['first_event'], 0) return def testGetAdlerChecksum(self): """ _testGetAdlerChecksum_ Test the function that sees if all files have an adler checksum. For some reason, our default XML report doesn't have checksums Therefore it should fail. """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) myReport.checkForAdlerChecksum(stepName="cmsRun1") self.assertFalse(myReport.stepSuccessful(stepName="cmsRun1")) self.assertEqual(myReport.getExitCode(), 60451) # Now see what happens if the adler32 is set to None myReport2 = Report("cmsRun1") myReport2.parse(self.xmlPath) fRefs = myReport2.getAllFileRefsFromStep(step="cmsRun1") for fRef in fRefs: fRef.checksums = {'adler32': None} myReport2.checkForAdlerChecksum(stepName="cmsRun1") self.assertFalse(myReport2.stepSuccessful(stepName="cmsRun1")) self.assertEqual(myReport2.getExitCode(), 60451) myReport3 = Report("cmsRun1") myReport3.parse(self.xmlPath) fRefs = myReport3.getAllFileRefsFromStep(step="cmsRun1") for fRef in fRefs: fRef.checksums = {'adler32': 100} myReport3.checkForAdlerChecksum(stepName="cmsRun1") self.assertTrue(myReport3.getExitCode() != 60451) return def testCheckLumiInformation(self): """ _testCheckLumiInformation_ Test the function that checks if all files have run lumi information """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) myReport.checkForRunLumiInformation(stepName="cmsRun1") self.assertNotEqual(myReport.getExitCode(), 70452) # Remove the lumi information on purpose myReport2 = Report("cmsRun1") myReport2.parse(self.xmlPath) fRefs = myReport2.getAllFileRefsFromStep(step="cmsRun1") for fRef in fRefs: fRef.runs = ConfigSection() myReport2.checkForRunLumiInformation(stepName="cmsRun1") self.assertFalse(myReport2.stepSuccessful(stepName="cmsRun1")) self.assertEqual(myReport2.getExitCode(), 70452) return def testTaskSuccessful(self): """ _testTaskSuccessful_ Test whether or not the report marks the task successful """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) # First, the report should fail self.assertFalse(myReport.taskSuccessful()) # Second, if we ignore cmsRun, the task # should succeed self.assertTrue(myReport.taskSuccessful(ignoreString='cmsRun')) return def testStripReport(self): """ _testStripReport_ Test whether or not we can strip input file information from a FWJR and create a smaller object. """ myReport = Report("cmsRun1") myReport.parse(self.xmlPath) path1 = os.path.join(self.testDir, 'testReport1.pkl') path2 = os.path.join(self.testDir, 'testReport2.pkl') myReport.save(path1) info = FileTools.getFileInfo(filename=path1) sizeBefore = info['Size'] inputFiles = myReport.getAllInputFiles() self.assertEqual(len(inputFiles), 1) myReport.stripInputFiles() self.assertEqual(len(myReport.getAllInputFiles()), 0) myReport.save(path2) info = FileTools.getFileInfo(filename=path2) sizeAfter = info['Size'] self.assertGreater(sizeBefore, sizeAfter) return def testDuplicatStep(self): """ _testDuplicateStep_ If the same step is added twice, it should act as a replacement, and raise an appropriate message """ baseReport = Report("cmsRun1") baseReport.parse(self.xmlPath) modReport = Report("cmsRun1") modReport.parse(self.xmlPath) setattr(modReport.data.cmsRun1, 'testVar', 'test01') report = Report() report.setStep(stepName='cmsRun1', stepSection=baseReport.retrieveStep('cmsRun1')) report.setStep(stepName='cmsRun1', stepSection=modReport.retrieveStep('cmsRun1')) self.assertEqual(report.listSteps(), ['cmsRun1']) self.assertEqual(report.data.cmsRun1.testVar, 'test01') return def testDeleteOutputModule(self): """ _testDeleteOutputModule_ If asked delete an output module, if it doesn't exist then do nothing """ originalReport = Report("cmsRun1") originalReport.parse(self.xmlPath) self.assertTrue( originalReport.getOutputModule("cmsRun1", "outputALCARECORECO"), "Error: Report XML doesn't have the module for the test, invalid test" ) originalOutputModules = len( originalReport.retrieveStep("cmsRun1").outputModules) originalReport.deleteOutputModuleForStep("cmsRun1", "outputALCARECORECO") self.assertFalse( originalReport.getOutputModule("cmsRun1", "outputALCARECORECO"), "Error: The output module persists after deletion") self.assertEqual( len(originalReport.retrieveStep("cmsRun1").outputModules), originalOutputModules - 1, "Error: The number of output modules is incorrect after deletion") def testSkippedFiles(self): """ _testSkippedFiles_ Test that skipped files are translated from FWJR into report """ # Check a report where some files were skipped but not all originalReport = Report("cmsRun1") originalReport.parse(self.skippedFilesxmlPath) self.assertEqual(originalReport.getAllSkippedFiles(), [ '/store/data/Run2012D/Cosmics/RAW/v1/000/206/379/1ED243E7-A611-E211-A851-0019B9F581C9.root' ]) # For negative control, check a good report with no skipped files goodReport = Report("cmsRun1") goodReport.parse(self.xmlPath) self.assertEqual(goodReport.getAllSkippedFiles(), []) # Check a report where all files were skipped badReport = Report("cmsRun1") badReport.parse(self.skippedAllFilesxmlPath) self.assertEqual(sorted(badReport.getAllSkippedFiles()), [ '/store/data/Run2012D/Cosmics/RAW/v1/000/206/379/1ED243E7-A611-E211-A851-0019B9F581C9.root', '/store/data/Run2012D/Cosmics/RAW/v1/000/206/379/1ED243E7-A622-E211-A851-0019B9F581C.root' ]) return def testSkippedFilesJSON(self): """ _testSkippedFilesJSON_ Test that skipped files are translated properly into JSON """ # Check a report where some files were skipped but not all originalReport = Report("cmsRun1") originalReport.parse(self.skippedFilesxmlPath) originalJSON = originalReport.__to_json__(None) self.assertEqual(len(originalJSON['skippedFiles']), 1) # For negative control, check a good report with no skipped files goodReport = Report("cmsRun1") goodReport.parse(self.xmlPath) goodJSON = goodReport.__to_json__(None) self.assertEqual(goodJSON['skippedFiles'], []) # Check a report where all files were skipped badReport = Report("cmsRun1") badReport.parse(self.skippedAllFilesxmlPath) badJSON = badReport.__to_json__(None) self.assertEqual(len(badJSON['skippedFiles']), 2) return def testFallbackFiles(self): """ _testFallbackFiles_ Test that fallback files end up in the report """ # For negative control, check a good report with no fallback files goodReport = Report("cmsRun1") goodReport.parse(self.xmlPath) self.assertEqual(goodReport.getAllFallbackFiles(), []) # Check a report where the file was a fallback badReport = Report("cmsRun1") badReport.parse(self.fallbackXmlPath) self.assertEqual(sorted(badReport.getAllFallbackFiles()), [ '/store/data/Run2012D/SingleElectron/AOD/PromptReco-v1/000/207/279/D43A5B72-1831-E211-895D-001D09F24763.root' ]) twoReport = Report("cmsRun1") twoReport.parse(self.twoFileFallbackXmlPath) self.assertEqual(len(twoReport.getAllFallbackFiles()), 2) return def testPileupFiles(self): """ _testPileupFiles_ Test that alll the pileup files end up in the report """ report = Report("cmsRun1") report.parse(self.pileupXmlPath) self.assertEqual(len(report.getAllInputFiles()), 14) primaryCount = 0 secondaryCount = 0 mixingCount = 0 for fileEntry in report.getAllInputFiles(): if fileEntry['input_type'] == 'mixingFiles': mixingCount += 1 elif fileEntry['input_type'] == 'primaryFiles': primaryCount += 1 elif fileEntry['input_type'] == 'secondaryFiles': secondaryCount += 1 self.assertEqual(primaryCount, 1) self.assertEqual(secondaryCount, 0) self.assertEqual(mixingCount, 13) self.assertEqual(len(report.getAllFallbackFiles()), 1) return def testFallbackFilesJSON(self): """ _testFallbackFilesJSON_ Test that fallback attempt files are translated properly into JSON """ # For negative control, check a good report with no skipped files goodReport = Report("cmsRun1") goodReport.parse(self.xmlPath) goodJSON = goodReport.__to_json__(None) self.assertEqual(goodJSON['fallbackFiles'], []) # Check a report where all files were skipped badReport = Report("cmsRun1") badReport.parse(self.fallbackXmlPath) badJSON = badReport.__to_json__(None) self.assertEqual(len(badJSON['fallbackFiles']), 1) return def testOutputCheck(self): """ _testOutputCheck_ Check that we can identify bad reports with no output files """ badReport = Report("cmsRun1") badReport.parse(self.skippedAllFilesxmlPath) badReport.checkForOutputFiles("cmsRun1") self.assertFalse(badReport.stepSuccessful(stepName="cmsRun1")) self.assertEqual(badReport.getExitCode(), 60450) return
class MonteCarloTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("montecarlo_t", "ConfigCache") self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) self.testInit.generateWorkDir() couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("rereco_t") EmulatorHelper.setEmulators(dbs = True) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() EmulatorHelper.resetEmulators() return def injectMonteCarloConfig(self): """ _injectMonteCarlo_ Create a bogus config cache document for the montecarlo generation and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" newConfig["owner"] = {"group": "cmsdataops", "user": "******"} newConfig["pset_tweak_details"] ={"process": {"outputModules_": ["OutputA", "OutputB"], "OutputA": {"dataset": {"filterName": "OutputAFilter", "dataTier": "RECO"}}, "OutputB": {"dataset": {"filterName": "OutputBFilter", "dataTier": "USER"}}}} result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def _commonMonteCarloTest(self): """ Retrieve the workload from WMBS and test all its properties. """ prodWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production") prodWorkflow.load() self.assertEqual(len(prodWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Production-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset = topLevelFileset, workflow = prodWorkflow) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: unmergedOutput = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName) unmergedOutput.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionMerge%s" % outputName) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for outputName in ["OutputA", "OutputB"]: unmerged = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionCleanupUnmerged%s" % outputName) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/Production/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: mergeLogCollect = Fileset(name = "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % outputName) mergeLogCollect.loadData() mergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % (outputName, outputName)) mergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = mergeLogCollect, workflow = mergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") def testMonteCarlo(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ProcConfigCacheID"] = self.injectMonteCarloConfig() testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return def testMonteCarloExtension(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. This uses a non-zero first event and lumi. Check that the splitting arguments are correctly set for the lfn counter. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ProcConfigCacheID"] = self.injectMonteCarloConfig() defaultArguments["FirstEvent"] = 3571428573 defaultArguments["FirstLumi"] = 26042 defaultArguments["TimePerEvent"] = 15 defaultArguments["FilterEfficiency"] = 0.014 defaultArguments["TotalTime"] = 28800 initial_lfn_counter = 26042 # Same as the previous number of jobs + 1 which is the same value of the first lumi testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() productionTask = testWorkload.getTaskByPath('/TestWorkload/Production') productionSplitting = productionTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in productionSplitting, "No initial lfn counter was stored") self.assertEqual(productionSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") for outputMod in ["OutputA", "OutputB"]: mergeTask = testWorkload.getTaskByPath('/TestWorkload/Production/ProductionMerge%s' % outputMod) mergeSplitting = mergeTask.jobSplittingParameters() self.assertTrue("initial_lfn_counter" in mergeSplitting, "No initial lfn counter was stored") self.assertEqual(mergeSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter") return def testRelValMCWithPileup(self): """ Create a Monte Carlo workflow and verify that it is injected correctly into WMBS and invoke its detailed test. The input configuration includes pileup input files. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "rereco_t" defaultArguments["ProcConfigCacheID"] = self.injectMonteCarloConfig() # add pile up configuration defaultArguments["PileupConfig"] = {"mc": ["/some/cosmics/dataset1", "/some/cosmics/dataset2"], "data": ["/some/minbias/dataset3"]} testWorkload = monteCarloWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self._commonMonteCarloTest() return
class DQMHarvestTests(unittest.TestCase): """ _DQMHarvestTests_ Tests the DQMHarvest spec file """ def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("dqmharvest_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("dqmharvest_t") self.testInit.generateWorkDir() self.workload = None self.jsonTemplate = getTestFile( 'data/ReqMgr/requests/DMWM/DQMHarvesting.json') return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def injectDQMHarvestConfig(self): """ _injectDQMHarvest_ Create a bogus config cache document for DQMHarvest and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e234f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10876a7" newConfig["owner"] = {"group": "DATAOPS", "user": "******"} newConfig["pset_tweak_details"] = {"process": {"outputModules_": []}} result = self.configDatabase.commitOne(newConfig) return result[0]["id"] def testDQMHarvest(self): """ Build a DQMHarvest workload """ testArguments = DQMHarvestWorkloadFactory.getTestArguments() # Read in the request request = json.load(open(self.jsonTemplate)) testArguments.update(request['createRequest']) testArguments.update({ "CouchURL": os.environ["COUCHURL"], "ConfigCacheUrl": os.environ["COUCHURL"], "CouchDBName": "dqmharvest_t", "DQMConfigCacheID": self.injectDQMHarvestConfig() }) testArguments.pop("ConfigCacheID", None) factory = DQMHarvestWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) # test workload properties self.assertEqual(testWorkload.getDashboardActivity(), "harvesting") self.assertEqual(testWorkload.getCampaign(), "Campaign-OVERRIDE-ME") self.assertEqual(testWorkload.getAcquisitionEra(), "CMSSW_7_3_1_patch1") self.assertEqual(testWorkload.getProcessingString(), "GR_R_73_V0A_TEST_RelVal_jetHT2012c") self.assertEqual(testWorkload.getProcessingVersion(), 1) self.assertFalse(testWorkload.getPrepID(), "PrepId does not match") self.assertEqual(testWorkload.getCMSSWVersions(), ['CMSSW_7_3_1_patch1']) # test workload attributes self.assertEqual(testWorkload.processingString, "GR_R_73_V0A_TEST_RelVal_jetHT2012c") self.assertEqual(testWorkload.acquisitionEra, "CMSSW_7_3_1_patch1") self.assertEqual(testWorkload.processingVersion, 1) self.assertEqual(sorted(testWorkload.lumiList.keys()), ['139788', '139790', '144011']) self.assertEqual( sorted(testWorkload.lumiList.values()), [[[5, 10], [15, 20], [25, 30]], [[25, 75], [125, 175], [275, 325]], [[50, 100], [110, 125]]]) self.assertEqual(testWorkload.data.policies.start.policyName, "DatasetBlock") # test workload tasks and steps tasks = testWorkload.listAllTaskNames() self.assertEqual(len(tasks), 2) self.assertEqual( sorted(tasks), ['EndOfRunDQMHarvest', 'EndOfRunDQMHarvestLogCollect']) task = testWorkload.getTask(tasks[0]) self.assertEqual(task.name(), "EndOfRunDQMHarvest") self.assertEqual(task.getPathName(), "/TestWorkload/EndOfRunDQMHarvest") self.assertEqual(task.taskType(), "Harvesting", "Wrong task type") self.assertEqual(task.jobSplittingAlgorithm(), "Harvest", "Wrong job splitting algo") self.assertFalse(task.inputLocationFlag(), "Wrong input location flag") self.assertEqual(sorted(task.inputRunWhitelist()), [ 138923, 138924, 138934, 138937, 139788, 139789, 139790, 144011, 144083, 144084, 144086 ]) self.assertEqual(sorted(task.listAllStepNames()), ['cmsRun1', 'logArch1', 'upload1']) self.assertEqual(task.getStep("cmsRun1").stepType(), "CMSSW") self.assertEqual(task.getStep("logArch1").stepType(), "LogArchive") self.assertEqual(task.getStep("upload1").stepType(), "DQMUpload") #print "ALAN inspect %s" % inspect.getmembers(allTasks) #for property, value in vars(allTasks).iteritems(): # print property, ": ", value return def testDQMHarvestFailed(self): """ Build a DQMHarvest workload without a DQM config doc """ testArguments = DQMHarvestWorkloadFactory.getTestArguments() # Read in the request request = json.load(open(self.jsonTemplate)) testArguments.update(request['createRequest']) testArguments.update({ "CouchURL": os.environ["COUCHURL"], "ConfigCacheUrl": os.environ["COUCHURL"], "CouchDBName": "dqmharvest_t", "ConfigCacheID": self.injectDQMHarvestConfig() }) testArguments.pop("DQMConfigCacheID", None) factory = DQMHarvestWorkloadFactory() self.assertRaises(WMSpecFactoryException, factory.validateSchema, testArguments) return
class PromptRecoTest(unittest.TestCase): def setUp(self): """ _setUp_ Initialize the database and couch. """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("promptreco_t", "ConfigCache") self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) couchServer = CouchServer(os.environ["COUCHURL"]) self.configDatabase = couchServer.connectDatabase("promptreco_t") self.testDir = self.testInit.generateWorkDir() myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) self.listTasksByWorkflow = self.daoFactory(classname="Workflow.LoadFromName") self.listFilesets = self.daoFactory(classname="Fileset.List") self.listSubsMapping = self.daoFactory(classname="Subscriptions.ListSubsAndFilesetsFromWorkflow") return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.tearDownCouch() self.testInit.clearDatabase() self.testInit.delWorkDir() return def setupPromptSkimConfigObject(self): """ _setupPromptSkimConfigObject_ Creates a custom config object for testing of the skim functionality """ self.promptSkim = ConfigSection(name="Tier1Skim") self.promptSkim.SkimName = "TestSkim1" self.promptSkim.DataTier = "RECO" self.promptSkim.TwoFileRead = False self.promptSkim.ProcessingVersion = "PromptSkim-v1" self.promptSkim.ConfigURL = "http://cmssw.cvs.cern.ch/cgi-bin/cmssw.cgi/CMSSW/Configuration/DataOps/python/prescaleskimmer.py?revision=1.1" # def testPromptReco(self): # """ # _testPromptReco_ # # Create a Prompt Reconstruction workflow # and verify it installs into WMBS correctly. # """ def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = {"write_RECO": "RECO", "write_ALCARECO": "ALCARECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = recoWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: fset = goldenOutputMod + "ALCARECO" mergedOutput = alcaSkimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name="/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset( name="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return def testMemCoresSettings(self): """ _testMemCoresSettings_ Make sure the multicore and memory setings are properly propagated to all tasks and steps. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) # test default values taskPaths = ['/TestWorkload/Reco', '/TestWorkload/Reco/AlcaSkim'] for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], 2300.0) # now test case where args are provided testArguments["Multicore"] = 6 testArguments["Memory"] = 4600.0 testArguments["EventStreams"] = 3 testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) for task in taskPaths: taskObj = testWorkload.getTaskByPath(task) for step in ('cmsRun1', 'stageOut1', 'logArch1'): stepHelper = taskObj.getStepHelper(step) if task == '/TestWorkload/Reco' and step == 'cmsRun1': self.assertEqual(stepHelper.getNumberOfCores(), testArguments["Multicore"]) self.assertEqual(stepHelper.getNumberOfStreams(), testArguments["EventStreams"]) elif step in ('stageOut1', 'logArch1'): self.assertEqual(stepHelper.getNumberOfCores(), 1) self.assertEqual(stepHelper.getNumberOfStreams(), 0) else: self.assertEqual(stepHelper.getNumberOfCores(), 1, "%s should be single-core" % task) self.assertEqual(stepHelper.getNumberOfStreams(), 0) # then test Memory requirements perfParams = taskObj.jobSplittingParameters()['performance'] self.assertEqual(perfParams['memoryRequirement'], testArguments["Memory"]) return def testFilesets(self): """ Test workflow tasks, filesets and subscriptions creation """ # expected tasks, filesets, subscriptions, etc expOutTasks = ['/TestWorkload/Reco', '/TestWorkload/Reco/AlcaSkim', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '/TestWorkload/Reco/RecoMergewrite_AOD', '/TestWorkload/Reco/RecoMergewrite_DQM', '/TestWorkload/Reco/RecoMergewrite_RECO'] expWfTasks = ['/TestWorkload/Reco', '/TestWorkload/Reco/AlcaSkim', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamHcalCalHOCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamMuAlGlobalCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamTkAlCosmics0T', '/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/AlcaSkimALCARECOStreamHcalCalHOCosmicsMergeLogCollect', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/AlcaSkimALCARECOStreamMuAlGlobalCosmicsMergeLogCollect', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/AlcaSkimALCARECOStreamTkAlCosmics0TMergeLogCollect', '/TestWorkload/Reco/LogCollect', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_ALCARECO', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_AOD', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_DQM', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_RECO', '/TestWorkload/Reco/RecoMergewrite_AOD', '/TestWorkload/Reco/RecoMergewrite_AOD/Recowrite_AODMergeLogCollect', '/TestWorkload/Reco/RecoMergewrite_DQM', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect', '/TestWorkload/Reco/RecoMergewrite_DQM/Recowrite_DQMMergeLogCollect', '/TestWorkload/Reco/RecoMergewrite_RECO', '/TestWorkload/Reco/RecoMergewrite_RECO/Recowrite_RECOMergeLogCollect'] expFsets = ['TestWorkload-Reco-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/merged-MergedALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamHcalCalHOCosmicsALCARECO', '/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/merged-MergedALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/merged-MergedALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamMuAlGlobalCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamTkAlCosmics0TALCARECO', '/TestWorkload/Reco/AlcaSkim/unmerged-logArchive', '/TestWorkload/Reco/RecoMergewrite_AOD/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_AOD/merged-MergedAOD', '/TestWorkload/Reco/unmerged-write_AODAOD', '/TestWorkload/Reco/unmerged-write_DQMDQM', '/TestWorkload/Reco/RecoMergewrite_DQM/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/Reco/RecoMergewrite_RECO/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_RECO/merged-MergedRECO', '/TestWorkload/Reco/unmerged-logArchive', '/TestWorkload/Reco/unmerged-write_RECORECO'] subMaps = [(4, '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics/AlcaSkimALCARECOStreamHcalCalHOCosmicsMergeLogCollect', 'MinFileBased', 'LogCollect'), (10, '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics/AlcaSkimALCARECOStreamMuAlGlobalCosmicsMergeLogCollect', 'MinFileBased', 'LogCollect'), (7, '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/merged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T/AlcaSkimALCARECOStreamTkAlCosmics0TMergeLogCollect', 'MinFileBased', 'LogCollect'), (5, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamHcalCalHOCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamHcalCalHOCosmics', 'SiblingProcessingBased', 'Cleanup'), (3, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamHcalCalHOCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', 'ParentlessMergeBySize', 'Merge'), (11, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamMuAlGlobalCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamMuAlGlobalCosmics', 'SiblingProcessingBased', 'Cleanup'), (9, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamMuAlGlobalCosmicsALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', 'ParentlessMergeBySize', 'Merge'), (8, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamTkAlCosmics0TALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmergedALCARECOStreamTkAlCosmics0T', 'SiblingProcessingBased', 'Cleanup'), (6, '/TestWorkload/Reco/AlcaSkim/unmerged-ALCARECOStreamTkAlCosmics0TALCARECO', '/TestWorkload/Reco/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', 'ParentlessMergeBySize', 'Merge'), (12, '/TestWorkload/Reco/AlcaSkim/unmerged-logArchive', '/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect', 'MinFileBased', 'LogCollect'), (15, '/TestWorkload/Reco/RecoMergewrite_AOD/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_AOD/Recowrite_AODMergeLogCollect', 'MinFileBased', 'LogCollect'), (20, '/TestWorkload/Reco/RecoMergewrite_DQM/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_DQM/Recowrite_DQMMergeLogCollect', 'MinFileBased', 'LogCollect'), (18, '/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged', 'Harvest', 'Harvesting'), (19, '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive', '/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect', 'MinFileBased', 'LogCollect'), (23, '/TestWorkload/Reco/RecoMergewrite_RECO/merged-logArchive', '/TestWorkload/Reco/RecoMergewrite_RECO/Recowrite_RECOMergeLogCollect', 'MinFileBased', 'LogCollect'), (25, '/TestWorkload/Reco/unmerged-logArchive', '/TestWorkload/Reco/LogCollect', 'MinFileBased', 'LogCollect'), (2, '/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO', '/TestWorkload/Reco/AlcaSkim', 'ParentlessMergeBySize', 'Processing'), (13, '/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_ALCARECO', 'SiblingProcessingBased', 'Cleanup'), (16, '/TestWorkload/Reco/unmerged-write_AODAOD', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_AOD', 'SiblingProcessingBased', 'Cleanup'), (14, '/TestWorkload/Reco/unmerged-write_AODAOD', '/TestWorkload/Reco/RecoMergewrite_AOD', 'ParentlessMergeBySize', 'Merge'), (21, '/TestWorkload/Reco/unmerged-write_DQMDQM', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_DQM', 'SiblingProcessingBased', 'Cleanup'), (17, '/TestWorkload/Reco/unmerged-write_DQMDQM', '/TestWorkload/Reco/RecoMergewrite_DQM', 'ParentlessMergeBySize', 'Merge'), (24, '/TestWorkload/Reco/unmerged-write_RECORECO', '/TestWorkload/Reco/RecoCleanupUnmergedwrite_RECO', 'SiblingProcessingBased', 'Cleanup'), (22, '/TestWorkload/Reco/unmerged-write_RECORECO', '/TestWorkload/Reco/RecoMergewrite_RECO', 'ParentlessMergeBySize', 'Merge'), (1, 'TestWorkload-Reco-/MinimumBias/ComissioningHI-v1/RAW', '/TestWorkload/Reco', 'EventAwareLumiBased', 'Processing')] testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWMBSHelper = WMBSHelper(testWorkload, "Reco", blockName=testArguments['InputDataset'], cachepath=self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) self.assertItemsEqual(testWorkload.listOutputProducingTasks(), expOutTasks) workflows = self.listTasksByWorkflow.execute(workflow="TestWorkload") self.assertItemsEqual([item['task'] for item in workflows], expWfTasks) # returns a tuple of id, name, open and last_update filesets = self.listFilesets.execute() self.assertItemsEqual([item[1] for item in filesets], expFsets) subscriptions = self.listSubsMapping.execute(workflow="TestWorkload", returnTuple=True) self.assertItemsEqual(subscriptions, subMaps)
class JobTrackerTest(unittest.TestCase): """ TestCase for TestJobTracker module """ _maxMessage = 10 def setUp(self): """ setup for test. """ myThread = threading.currentThread() self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() #self.testInit.clearDatabase(modules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl"]) self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault = False) # self.testInit.setupCouch("jobtracker_t/jobs", "JobDump") # self.testInit.setupCouch("jobtracker_t/fwjrs", "FWJRDump") self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.getJobs = self.daoFactory(classname = "Jobs.GetAllJobs") #Create sites in resourceControl resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'malpaquet', seName = 'se.malpaquet', ceName = 'malpaquet', plugin = "CondorPlugin") resourceControl.insertThreshold(siteName = 'malpaquet', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute(siteName = "malpaquet", seName = "malpaquet", ceName = "malpaquet", plugin = "CondorPlugin") # Create user newuser = self.daoFactory(classname = "Users.New") newuser.execute(dn = "jchurchill") # We actually need the user name self.user = getpass.getuser() self.testDir = self.testInit.generateWorkDir() def tearDown(self): """ Database deletion """ self.testInit.clearDatabase(modules = ["WMCore.WMBS", "WMCore.BossAir", "WMCore.ResourceControl"]) self.testInit.delWorkDir() self.testInit.tearDownCouch() return def getConfig(self): """ _getConfig_ Build a basic JobTracker config """ config = Configuration() config.section_("Agent") config.Agent.agentName = 'testAgent' config.section_("CoreDatabase") config.CoreDatabase.connectUrl = os.getenv("DATABASE") config.CoreDatabase.socket = os.getenv("DBSOCK") # JobTracker config.component_("JobTracker") config.JobTracker.logLevel = 'INFO' config.JobTracker.pollInterval = 10 config.JobTracker.trackerName = 'CondorTracker' config.JobTracker.pluginDir = 'WMComponent.JobTracker.Plugins' config.JobTracker.componentDir = os.path.join(os.getcwd(), 'Components') config.JobTracker.runTimeLimit = 7776000 #Jobs expire after 90 days config.JobTracker.idleTimeLimit = 7776000 config.JobTracker.heldTimeLimit = 7776000 config.JobTracker.unknTimeLimit = 7776000 config.component_("JobSubmitter") config.JobSubmitter.logLevel = 'INFO' config.JobSubmitter.maxThreads = 1 config.JobSubmitter.pollInterval = 10 config.JobSubmitter.pluginName = 'AirPlugin' config.JobSubmitter.pluginDir = 'JobSubmitter.Plugins' config.JobSubmitter.submitDir = os.path.join(self.testDir, 'submit') config.JobSubmitter.submitNode = os.getenv("HOSTNAME", 'badtest.fnal.gov') #config.JobSubmitter.submitScript = os.path.join(os.getcwd(), 'submit.sh') config.JobSubmitter.submitScript = os.path.join(WMCore.WMInit.getWMBASE(), 'test/python/WMComponent_t/JobSubmitter_t', 'submit.sh') config.JobSubmitter.componentDir = os.path.join(os.getcwd(), 'Components') config.JobSubmitter.workerThreads = 2 config.JobSubmitter.jobsPerWorker = 200 config.JobSubmitter.gLiteConf = os.path.join(os.getcwd(), 'config.cfg') # BossAir config.component_("BossAir") config.BossAir.pluginNames = ['TestPlugin', 'CondorPlugin'] config.BossAir.pluginDir = 'WMCore.BossAir.Plugins' #JobStateMachine config.component_('JobStateMachine') config.JobStateMachine.couchurl = os.getenv('COUCHURL', 'cmssrv52.fnal.gov:5984') config.JobStateMachine.couchDBName = "jobtracker_t" return config def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name = '%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup @attr('integration') def testA_CondorTest(self): """ _CondorTest_ Because I don't want this test to be submitter dependent: Create a dummy condor job. Submit a dummy condor job. Track it. Kill it. Exit """ myThread = threading.currentThread() # This has to be run with an empty queue nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) nJobs = 10 jobCE = 'cmsosgce.fnal.gov/jobmanager-condor' # Create directories cacheDir = os.path.join(self.testDir, 'CacheDir') submitDir = os.path.join(self.testDir, 'SubmitDir') if not os.path.isdir(cacheDir): os.makedirs(cacheDir) if not os.path.isdir(submitDir): os.makedirs(submitDir) # Get config config = self.getConfig() # Get jobGroup testJobGroup = self.createTestJobs(nJobs = nJobs, cacheDir = cacheDir) # Propogate jobs changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nJobs) jobTracker = JobTrackerPoller(config) jobTracker.setup() # First iteration # There are no jobs in the tracker, # The tracker should register the jobs as missing # This should tell it that they've finished # So the tracker should send them onwards jobTracker.algorithm() result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nJobs) result = self.getJobs.execute(state = 'complete', jobType = "Processing") self.assertEqual(len(result), 0) # Second iteration # Reset the jobs # This time submit them to the queue # The jobs should remain in holding changer.propagate(testJobGroup.jobs, 'executing', 'created') result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nJobs) # Create a submit script createSubmitScript(submitDir) jobPackage = os.path.join(self.testDir, 'JobPackage.pkl') f = open(jobPackage, 'w') f.write(' ') f.close() sandbox = os.path.join(self.testDir, 'sandbox.box') f = open(sandbox, 'w') f.write(' ') f.close() for job in testJobGroup.jobs: job['plugin'] = 'CondorPlugin' job['userdn'] = 'jchurchill' job['custom'] = {'location': 'malpaquet'} job['cache_dir'] = self.testDir job['sandbox'] = sandbox job['packageDir'] = self.testDir info = {} info['packageDir'] = self.testDir info['index'] = 0 info['sandbox'] = sandbox jobTracker.bossAir.submit(jobs = testJobGroup.jobs, info = info) time.sleep(1) # All jobs should be running nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) # Run the algorithm. After this # all jobs should still be running jobTracker.algorithm() # Are jobs in the right state? result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nJobs) result = self.getJobs.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), 0) # Are jobs still in the condor_q nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, nJobs) # Then we're done jobTracker.bossAir.kill(jobs = testJobGroup.jobs) # No jobs should be left nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) jobTracker.algorithm() # Are jobs in the right state? result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), 0) result = self.getJobs.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), nJobs) # This is optional if you want to look at what # files were actually created during running #if os.path.isdir('testDir'): # shutil.rmtree('testDir') #shutil.copytree('%s' %self.testDir, os.path.join(os.getcwd(), 'testDir')) return @attr('integration') def testB_ReallyLongTest(self): """ _ReallyLongTest_ Run a really long test using the condor plugin """ return # This has to be run with an empty queue nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) myThread = threading.currentThread() # This has to be run with an empty queue nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0, "User currently has %i running jobs. Test will not continue" % (nRunning)) nJobs = 500 jobCE = 'cmsosgce.fnal.gov/jobmanager-condor' # Create directories cacheDir = os.path.join(self.testDir, 'CacheDir') submitDir = os.path.join(self.testDir, 'SubmitDir') if not os.path.isdir(cacheDir): os.makedirs(cacheDir) if not os.path.isdir(submitDir): os.makedirs(submitDir) # Get config config = self.getConfig() # Get jobGroup testJobGroup = self.createTestJobs(nJobs = nJobs, cacheDir = cacheDir) # Propogate jobs changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') jobTracker = JobTrackerPoller(config) jobTracker.setup() # Now create some jobs for job in testJobGroup.jobs[:(nJobs/2)]: jdl = createJDL(id = job['id'], directory = submitDir, jobCE = jobCE) jdlFile = os.path.join(submitDir, 'condorJDL_%i.jdl' % (job['id'])) handle = open(jdlFile, 'w') handle.writelines(jdl) handle.close() command = ["condor_submit", jdlFile] pipe = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = False) pipe.communicate() startTime = time.time() cProfile.runctx("jobTracker.algorithm()", globals(), locals(), filename = "testStats.stat") #jobTracker.algorithm() stopTime = time.time() # Are jobs in the right state? result = self.getJobs.execute(state = 'Executing', jobType = "Processing") self.assertEqual(len(result), nJobs/2) result = self.getJobs.execute(state = 'Complete', jobType = "Processing") self.assertEqual(len(result), nJobs/2) # Then we're done killList = [x['id'] for x in testJobGroup.jobs] jobTracker.killJobs(jobList = killList) # No jobs should be left nRunning = getCondorRunningJobs(self.user) self.assertEqual(nRunning, 0) print ("Process took %f seconds to process %i classAds" %((stopTime - startTime), nJobs/2)) p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() def testAlerts(self): """ Tests only alerts triggered from JobTrackerPoller. """ config = self.getConfig() jobTracker = JobTrackerPoller(config) jobTracker.sendAlert(6, msg = "test message")