def testLoad(self): """ _testLoad_ Create a job and save it to the database. Load it back from the database using the name and the id and then verify that all information was loaded correctly. """ testJobA = self.createTestJob() testJobB = Job(id=testJobA["id"]) testJobC = Job(name=testJobA["name"]) testJobB.load() testJobC.load() assert (testJobA["id"] == testJobB["id"]) and \ (testJobA["name"] == testJobB["name"]) and \ (testJobA["jobgroup"] == testJobB["jobgroup"]) and \ (testJobA["couch_record"] == testJobB["couch_record"]) and \ (testJobA["location"] == testJobB["location"]), \ "ERROR: Load from ID didn't load everything correctly" assert (testJobA["id"] == testJobC["id"]) and \ (testJobA["name"] == testJobC["name"]) and \ (testJobA["jobgroup"] == testJobC["jobgroup"]) and \ (testJobA["couch_record"] == testJobC["couch_record"]) and \ (testJobA["location"] == testJobC["location"]), \ "ERROR: Load from name didn't load everything correctly" self.assertEqual(testJobB['outcome'], 'failure') self.assertEqual(testJobC['outcome'], 'failure') self.assertEqual(testJobB['fwjr'], None) self.assertEqual(testJobC['fwjr'], None) return
def handleFailed(self, jobID, fwkJobReport): """ _handleFailed_ Handle a failed job. Update the job's metadata marking the outcome as 'failure' and incrementing the retry count. Mark all the files used as input for the job as failed. Finally, update the job's state. """ wmbsJob = Job(id=jobID) wmbsJob.load() outputID = wmbsJob.loadOutputID() wmbsJob["outcome"] = "failure" #wmbsJob.save() # We'll fake the rest of the state transitions here as the rest of the # WMAgent job submission framework is not yet complete. wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) jobType = self.getJobTypeAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) fileList = fwkJobReport.getAllFilesFromStep(step='logArch1') for fwjrFile in fileList: wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID=jobID, task=fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputID }) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputFileset }) self._mapLocation(wmbsJob['fwjr']) self.listOfJobsToFail.append(wmbsJob) return
def handleSuccessful(self, jobID, fwkJobReport, fwkJobReportPath=None): """ _handleSuccessful_ Handle a successful job, parsing the job report and updating the job in WMBS. """ wmbsJob = Job(id=jobID) wmbsJob.load() wmbsJob["outcome"] = "success" wmbsJob.getMask() outputID = wmbsJob.loadOutputID() wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) jobType = self.getJobTypeAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) fileList = fwkJobReport.getAllFiles() for fwjrFile in fileList: wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID=jobID, task=fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputID }) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputFileset }) # Only save once job is done, and we're sure we made it through okay self._mapLocation(wmbsJob['fwjr']) self.listOfJobsToSave.append(wmbsJob) #wmbsJob.save() return
def handleNeedsASO(self, jobID, fwkJobReport, fwkJobReportPath = None, asoOutputCount = 0): """ _handleNeedsASO_ If we see the FWJR asks for ASO, move to a set of ASO states before moving to success """ wmbsJob = Job(id = jobID) wmbsJob.load() self.listOfJobsNeedingASO.append(wmbsJob) return
def handleFailed(self, jobID, fwkJobReport): """ _handleFailed_ Handle a failed job. Update the job's metadata marking the outcome as 'failure' and incrementing the retry count. Mark all the files used as input for the job as failed. Finally, update the job's state. """ wmbsJob = Job(id = jobID) wmbsJob.load() outputID = wmbsJob.loadOutputID() wmbsJob["outcome"] = "failure" #wmbsJob.save() # We'll fake the rest of the state transitions here as the rest of the # WMAgent job submission framework is not yet complete. wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) jobType = self.getJobTypeAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) fileList = fwkJobReport.getAllFilesFromStep(step = 'logArch1') if len(fileList) > 0: # Need task name info to proceed self.isTaskExistInFWJR(fwkJobReport, "failed") for fwjrFile in fileList: wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID = jobID, task = fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputID}) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputFileset}) self._mapLocation(wmbsJob['fwjr']) self.listOfJobsToFail.append(wmbsJob) return
def handleSuccessful(self, jobID, fwkJobReport, fwkJobReportPath = None): """ _handleSuccessful_ Handle a successful job, parsing the job report and updating the job in WMBS. """ wmbsJob = Job(id = jobID) wmbsJob.load() wmbsJob["outcome"] = "success" wmbsJob.getMask() outputID = wmbsJob.loadOutputID() wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) jobType = self.getJobTypeAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) fileList = fwkJobReport.getAllFiles() bookKeepingSuccess = True for fwjrFile in fileList: # associate logArchived file for parent jobs on wmstats assuming fileList is length is 1. if jobType == "LogCollect": try: self.associateLogCollectToParentJobsInWMStats(fwkJobReport, fwjrFile["lfn"], fwkJobReport.getTaskName()) except Exception, ex: bookKeepingSuccess = False logging.error("Error occurred: associating log collect location, will try again\n %s" % str(ex)) break wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID = jobID, task = fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputID}) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputFileset})
def handleSuccessful(self, jobID, fwkJobReport, fwkJobReportPath = None): """ _handleSuccessful_ Handle a successful job, parsing the job report and updating the job in WMBS. """ wmbsJob = Job(id = jobID) wmbsJob.load() wmbsJob["outcome"] = "success" wmbsJob.getMask() outputID = wmbsJob.loadOutputID() wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) jobType = self.getJobTypeAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) fileList = fwkJobReport.getAllFiles() for fwjrFile in fileList: wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID = jobID, task = fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputID}) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputFileset}) # Only save once job is done, and we're sure we made it through okay self._mapLocation(wmbsJob['fwjr']) self.listOfJobsToSave.append(wmbsJob) #wmbsJob.save() return
def verifyJobSuccess(self, jobID): """ _verifyJobSuccess_ Verify that the metadata for a successful job is correct. This will check the outcome, retry count and state. """ testJob = Job(id = jobID) testJob.load() assert testJob["state"] == "success", \ "Error: test job in wrong state: %s" % testJob["state"] assert testJob["retry_count"] == 0, \ "Error: test job has wrong retry count: %s" % testJob["retry_count"] assert testJob["outcome"] == "success", \ "Error: test job has wrong outcome: %s" % testJob["outcome"] return
def testG_ProcessingAlgo(self): """ _ProcessingAlgo_ Test for the ProcessingAlgo Prototype """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing': 'ProcessingAlgo'} config.RetryManager.section_("ProcessingAlgo") config.RetryManager.ProcessingAlgo.section_("default") config.RetryManager.ProcessingAlgo.default.coolOffTime = { 'create': 10, 'submit': 10, 'job': 10 } changer = ChangeState(config) fwjrPath = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t", "fwjrs/badBackfillJobReport.pkl") report = Report() report.load(fwjrPath) for job in testJobGroup.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save( os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') testRetryManager = RetryManagerPoller(config) testRetryManager.algorithm() idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: j = Job(id=job['id']) j.load() self.assertEqual(j['retry_count'], 1) report.save( os.path.join(j['cache_dir'], "Report.%i.pkl" % j['retry_count'])) config.RetryManager.ProcessingAlgo.default.OneMoreErrorCodes = [8020] testRetryManager2 = RetryManagerPoller(config) testRetryManager2.algorithm() idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: j = Job(id=job['id']) j.load() self.assertEqual(j['retry_count'], 5) # Now test timeout testJobGroup2 = self.createTestJobGroup(nJobs=self.nJobs) # Cycle jobs for job in testJobGroup2.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save( os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup2.jobs, 'created', 'new') changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup2.jobs: j = Job(id=job['id']) j.load() self.assertEqual(j['retry_count'], 0) config.RetryManager.ProcessingAlgo.default.OneMoreErrorCodes = [] config.RetryManager.ProcessingAlgo.default.MaxRunTime = 1 testRetryManager3 = RetryManagerPoller(config) testRetryManager3.algorithm() idList = self.getJobs.execute(state='Created') self.assertEqual(len(idList), self.nJobs * 2) for job in testJobGroup2.jobs: j = Job(id=job['id']) j.load() self.assertEqual(j['retry_count'], 5) return
def updateState(self, payload, status): """ Udpate the status of job in payload to status """ self.log.info("CrabJobCreatorWorker initialized with payload \ %s"%payload) import re r = re.compile("BossJob_(\d+)_(\d+)/") m = r.search(payload) if (m): taskId, jobId = m.groups() else: self.log.info("CrabJobCreatorWorkerFailed to parse %s \ and update job status to %s" %(payload,status)) return # Parse payload to obtain taskId and jobId self.log.info("--->>> taskId = " + str(taskId)) self.log.info("--->>> jobId = " + str(jobId)) task = self.blDBsession.load(taskId, jobId) self.log.info("--->>> wmbs job id %s" %task.jobs[0]["wmbsJobId"]) wrapperReturnCode=str(task.jobs[0].runningJob['wrapperReturnCode']) applicationReturnCode=str(task.jobs[0].runningJob['applicationReturnCode']) self.log.info("--->>> wrapperReturnCode = " + str(wrapperReturnCode)) self.log.info("--->>> applicationReturnCode = " + str(applicationReturnCode)) # Consider jobs with wrapperReturnCode=0 and applicationReturnCode=0 as success jobs if int(wrapperReturnCode) == 0 or int(applicationReturnCode) == 0: status = 'success' else: status = 'jobfailed' if not task.jobs[0]["wmbsJobId"] : self.log.info("--->>> jobId %s doesn't have wmbsJobId %s" \ %(str(jobId),task.jobs[0]["wmbsJobId"])) return # Changment state work jobObj = Job(id = task.jobs[0]["wmbsJobId"]) if jobObj.exists() == False: self.log.info("--->>> wmbs job id %s doesn't exists" %task.jobs[0]["wmbsJobId"]) else: jobObj.load() jobObj.changeState(status) self.queries.execute(jobs = [jobObj]) jobObj["outcome"] = status jobObj.save() self.log.info("CrabJobCreatorWorker update state to %s of wmbsJob \ %s bl_job %s task %s" %(status, task.jobs[0]["wmbsJobId"], jobId, taskId)) self.log.info("CrabJobCreatorWorker finished") return
class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl" ], useDefault=False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = MockDBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=threading.currentThread().logger, dbinterface=threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual( list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job=1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule="OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size=1, max_merge_size=2, max_merge_events=3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule="OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBased", files_per_job=50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule="Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job=1, include_parents=True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) return testWorkload def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = fakeSiteDB() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname="Locations.New") self.ses = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName=site, seName=self.siteDB.cmsNametoSE(site)[0]) self.ses.append(self.siteDB.cmsNametoSE(site)[0]) def createWMSpec(self, name='ReRecoWorkload'): factory = ReRecoWorkloadFactory() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") wmspec.setSubscriptionInformation(custodialSites=[], nonCustodialSites=[], autoApproveSites=[], priority="Low", custodialSubType="Move") return wmspec def createMCWMSpec(self, name='MonteCarloWorkload'): wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = MockDBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask=None, parentFlag=False, detail=False): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, topLevelTask.name(), block, mask, cachepath=self.workDir) if block: if parentFlag: block = self.dbs.getFileBlockWithParents(block)[block] else: block = self.dbs.getFileBlock(block)[block] sub, files = wmbs.createSubscriptionAndAddFiles(block=block) if detail: return wmbs, sub, files else: return wmbs def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config=config) # Create nine jobs self.setupForKillTest(baAPI=baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#2" #add run blacklist self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#2" # Run Whitelist self.topLevelTask.setInputRunWhitelist([2]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsOK(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['1'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['1,1'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsKO(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = [ '123454321' ] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = [ '123,123' ] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#1" wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) def testParentage(self): """ 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ block = self.dataset + "#1" wmbs, sub, numFiles = self.createWMBSHelperWithTopTask( self.wmspec, block, parentFlag=False, detail=True) # file creation without parents self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # no parent per child self.assertEqual(len(child["parents"]), 0) wmbs, sub, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=True, detail=True) self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # one parent per child self.assertEqual(len(child["parents"]), 1) def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(self.ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
class WMBSHelperTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ """ super(WMBSHelperTest, self).setUp() self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault = False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = DBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = threading.currentThread().logger, dbinterface = threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() super(WMBSHelperTest, self).tearDown() return def setupForKillTest(self, baAPI = None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) dummyLocationAction = daoFactory(classname = "Locations.New") changeStateAction = daoFactory(classname = "Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', pnn = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname = "Users.New") userAction.execute(dn = userDN, group_name = 'DEFAULT', role_name = 'DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations = "goodse.cern.ch") inputFileB = File("lfnB", locations = "goodse.cern.ch") inputFileC = File("lfnC", locations = "goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations = "goodse.cern.ch") unmergedFileB = File("ulfnB", locations = "goodse.cern.ch") unmergedFileC = File("ulfnC", locations = "goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset = inputFileset, workflow = mainProcWorkflow, type = "Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription = self.mainProcSub) procJobGroup.create() self.procJobA = Job(name = "ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name = "ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name = "ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset = unmergedOutputFileset, workflow = mainProcMergeWorkflow, type = "Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription = self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name = "MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name = "MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name = "MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset = unmergedOutputFileset, workflow = mainCleanupWorkflow, type = "Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription = self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name = "CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name = "CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name = "CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs = jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations = "goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec = "spec2", owner = "Steve", name = "Bogus", task = "Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset = bogusFileset, workflow = bogusWorkflow, type = "Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual(list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job = 1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size = 1, max_merge_size = 2, max_merge_events = 3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBased", files_per_job = 50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") dummyCleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule = "Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job = 1, include_parents = True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) return testWorkload def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = SiteDBJSON() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname = "Locations.New") self.pnns = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName = site, pnn = self.siteDB.cmsNametoPhEDExNode(site)[0]) self.pnns.append(self.siteDB.cmsNametoPhEDExNode(site)[0]) def createWMSpec(self, name = 'ReRecoWorkload'): factory = ReRecoWorkloadFactory() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") wmspec.setSubscriptionInformation(custodialSites = [], nonCustodialSites = [], autoApproveSites = [], priority = "Low", custodialSubType = "Move") return wmspec def createMCWMSpec(self, name='MonteCarloWorkload'): mcArgs['CouchDBName'] = rerecoArgs["CouchDBName"] mcArgs["ConfigCacheID"] = createConfig(mcArgs["CouchDBName"]) wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = DBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask = None, parentFlag = False, detail = False): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, topLevelTask.name(), block, mask, cachepath = self.workDir) if block: if parentFlag: block = self.dbs.getFileBlockWithParents(block)[block] else: block = self.dbs.getFileBlock(block)[block] sub, files = wmbs.createSubscriptionAndAddFiles(block = block) if detail: return wmbs, sub, files else: return wmbs def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config = config) # Create nine jobs self.setupForKillTest(baAPI = baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', pnn = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', pnn = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath = self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', pnn = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', pnn = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath = self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS(task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#" + BLOCK2 self.topLevelTask.setInputRunBlacklist([181183]) # Set run blacklist to only run in the block wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#" + BLOCK2 self.topLevelTask.setInputRunWhitelist([181183]) # Set run whitelist to only run in the block wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 1) def testLumiMaskRestrictionsOK(self): block = self.dataset + "#" + BLOCK1 self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['181367'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['57,80'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 1) def testLumiMaskRestrictionsKO(self): block = self.dataset + "#" + BLOCK1 self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['123454321'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['123,123'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#" + BLOCK1 wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) # Not clear what's supposed to happen here, 2nd test is completely redundant dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. # Not clear what's supposed to happen here, 2nd test is completely redundant numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) def testParentage(self): """ 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ # Swap out the dataset for one that has parents task = next(self.wmspec.taskIterator()) oldDS = task.inputDataset() # Copy the old dataset, only will use DBS URL from it task.addInputDataset(dbsurl=oldDS.dbsurl, primary='Cosmics', processed='ComissioningHI-PromptReco-v1', tier='RECO') block = '/Cosmics/ComissioningHI-PromptReco-v1/RECO' + '#5b89ba9c-0dbf-11e1-9b6c-003048caaace' # File creation without parents wmbs, _, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=False, detail=True) self.assertEqual(8, numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: self.assertEqual(len(child["parents"]), 0) # no parents per child # File creation with parents wmbs, _, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=True, detail=True) self.assertEqual(8, numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: self.assertEqual(len(child["parents"]), 1) # one parent per child def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" # This test is failing because the name of the couch DB is set to None # in TestMonteCarloWorkloadFactory.getMCArgs() but changing it to # "reqmgr_config_cache_t" from StdBase test arguments does not fix the # situation. testDuplicateSubscription probably has the same issue self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) firstFile = list(fileset.files)[0] self.assertEqual(firstFile['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(firstFile['merged'], False) # merged files get added to dbs self.assertEqual(len(firstFile['parents']), 0) #firstFile.loadData() self.assertEqual(sorted(firstFile['locations']), sorted(self.pnns)) self.assertEqual(len(firstFile.getParentLFNs()), 0) self.assertEqual(len(firstFile.getRuns()), 1) run = firstFile.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
def testG_ProcessingAlgo(self): """ _ProcessingAlgo_ Test for the ProcessingAlgo Prototype """ testJobGroup = self.createTestJobGroup(nJobs = self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing' : 'ProcessingAlgo'} config.RetryManager.section_("ProcessingAlgo") config.RetryManager.ProcessingAlgo.section_("default") config.RetryManager.ProcessingAlgo.default.coolOffTime = {'create': 10, 'submit': 10, 'job': 10} changer = ChangeState(config) fwjrPath = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t", "fwjrs/badBackfillJobReport.pkl") report = Report() report.load(fwjrPath) for job in testJobGroup.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save(os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') testRetryManager = RetryManagerPoller(config) testRetryManager.algorithm() idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 1) report.save(os.path.join(j['cache_dir'], "Report.%i.pkl" % j['retry_count'])) config.RetryManager.ProcessingAlgo.default.OneMoreErrorCodes = [8020] testRetryManager2 = RetryManagerPoller(config) testRetryManager2.algorithm() idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 5) # Now test timeout testJobGroup2 = self.createTestJobGroup(nJobs = self.nJobs) # Cycle jobs for job in testJobGroup2.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save(os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) changer.propagate(testJobGroup2.jobs, 'created', 'new') changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup2.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 0) config.RetryManager.ProcessingAlgo.default.OneMoreErrorCodes = [] config.RetryManager.ProcessingAlgo.default.MaxRunTime = 1 testRetryManager3 = RetryManagerPoller(config) testRetryManager3.algorithm() idList = self.getJobs.execute(state = 'Created') self.assertEqual(len(idList), self.nJobs * 2) for job in testJobGroup2.jobs: j = Job(id = job['id']) j.load() self.assertEqual(j['retry_count'], 5) return
def handleSuccessful(self, jobID, fwkJobReport, fwkJobReportPath = None): """ _handleSuccessful_ Handle a successful job, parsing the job report and updating the job in WMBS. """ wmbsJob = Job(id = jobID) wmbsJob.load() wmbsJob["outcome"] = "success" wmbsJob.getMask() outputID = wmbsJob.loadOutputID() wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) jobType = self.getJobTypeAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) fileList = fwkJobReport.getAllFiles() bookKeepingSuccess = True for fwjrFile in fileList: # associate logArchived file for parent jobs on wmstats assuming fileList is length is 1. if jobType == "LogCollect": try: self.associateLogCollectToParentJobsInWMStats(fwkJobReport, fwjrFile["lfn"], fwkJobReport.getTaskName()) except Exception as ex: bookKeepingSuccess = False logging.error("Error occurred: associating log collect location, will try again\n %s" % str(ex)) break wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID = jobID, task = fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputID}) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputFileset}) # Check if the job had any skipped files # Put them in ACDC containers, we assume full file processing # No job masks skippedFiles = fwkJobReport.getAllSkippedFiles() if skippedFiles: self.jobsWithSkippedFiles[jobID] = skippedFiles if bookKeepingSuccess: # Only save once job is done, and we're sure we made it through okay self._mapLocation(wmbsJob['fwjr']) self.listOfJobsToSave.append(wmbsJob) #wmbsJob.save() return
def handleSuccessful(self, jobID, fwkJobReport, fwkJobReportPath=None): """ _handleSuccessful_ Handle a successful job, parsing the job report and updating the job in WMBS. """ wmbsJob = Job(id=jobID) wmbsJob.load() wmbsJob["outcome"] = "success" wmbsJob.getMask() outputID = wmbsJob.loadOutputID() wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) jobType = self.getJobTypeAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) fileList = fwkJobReport.getAllFiles() bookKeepingSuccess = True for fwjrFile in fileList: # associate logArchived file for parent jobs on wmstats assuming fileList is length is 1. if jobType == "LogCollect": try: self.associateLogCollectToParentJobsInWMStats( fwkJobReport, fwjrFile["lfn"], fwkJobReport.getTaskName()) except Exception, ex: bookKeepingSuccess = False logging.error( "Error occurred: associating log collect location, will try again\n %s" % str(ex)) break wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID=jobID, task=fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputID }) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputFileset })
def handleJob(self, jobID, fwkJobReport): """ _handleJob_ Figure out if a job was successful or not, handle it appropriately (parse FWJR, update WMBS) and return the success status as a boolean """ jobSuccess = fwkJobReport.taskSuccessful() outputMap = self.getOutputMapAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) jobType = self.getJobTypeAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) if jobSuccess: fileList = fwkJobReport.getAllFiles() # consistency check comparing outputMap to fileList # they should match except for some limited special cases outputModules = set([]) for fwjrFile in fileList: outputModules.add(fwjrFile['outputModule']) if set(outputMap.keys()) == outputModules: pass elif jobType == "LogCollect" and len( outputMap.keys()) == 0 and outputModules == set( ['LogCollect']): pass elif jobType == "Merge" and set(outputMap.keys()) == set([ 'Merged', 'MergedError', 'logArchive' ]) and outputModules == set(['Merged', 'logArchive']): pass elif jobType == "Merge" and set(outputMap.keys()) == set([ 'Merged', 'MergedError', 'logArchive' ]) and outputModules == set(['MergedError', 'logArchive']): pass elif jobType == "Express" and set( outputMap.keys()).difference(outputModules) == set( ['write_RAW']): pass else: failJob = True if jobType in ["Processing", "Production"]: cmsRunSteps = 0 for step in fwkJobReport.listSteps(): if step.startswith("cmsRun"): cmsRunSteps += 1 if cmsRunSteps > 1: failJob = False if failJob: jobSuccess = False logging.error( "Job %d , list of expected outputModules does not match job report, failing job", jobID) logging.debug("Job %d , expected outputModules %s", jobID, sorted(outputMap.keys())) logging.debug("Job %d , fwjr outputModules %s", jobID, sorted(outputModules)) fileList = fwkJobReport.getAllFilesFromStep( step='logArch1') else: logging.debug( "Job %d , list of expected outputModules does not match job report, accepted for multi-step CMSSW job", jobID) else: fileList = fwkJobReport.getAllFilesFromStep(step='logArch1') if jobSuccess: logging.info("Job %d , handle successful job", jobID) else: logging.warning("Job %d , bad jobReport, failing job", jobID) # make sure the task name is present in FWJR (recover from WMBS if needed) if len(fileList) > 0: if jobSuccess: self.isTaskExistInFWJR(fwkJobReport, "success") else: self.isTaskExistInFWJR(fwkJobReport, "failed") # special check for LogCollect jobs skipLogCollect = False if jobSuccess and jobType == "LogCollect": for fwjrFile in fileList: try: # this assumes there is only one file for LogCollect jobs, not sure what happend if that changes self.associateLogCollectToParentJobsInWMStats( fwkJobReport, fwjrFile["lfn"], fwkJobReport.getTaskName()) except Exception as ex: skipLogCollect = True logging.error( "Error occurred: associating log collect location, will try again\n %s" % str(ex)) break # now handle the job (unless the special LogCollect check failed) if not skipLogCollect: wmbsJob = Job(id=jobID) wmbsJob.load() outputID = wmbsJob.loadOutputID() wmbsJob.getMask() wmbsJob["fwjr"] = fwkJobReport if jobSuccess: wmbsJob["outcome"] = "success" else: wmbsJob["outcome"] = "failure" for fwjrFile in fileList: logging.debug("Job %d , register output %s", jobID, fwjrFile["lfn"]) wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID=jobID, task=fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputID }) # LogCollect jobs have no output fileset if jobType == "LogCollect": pass # Repack jobs that wrote too large merged output skip output filesets elif jobType == "Repack" and merged and wmbsFile[ "size"] > self.maxAllowedRepackOutputSize: pass else: outputFilesets = self.outputFilesetsForJob( outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputFileset }) # Check if the job had any skipped files, put them in ACDC containers # We assume full file processing (no job masks) if jobSuccess: skippedFiles = fwkJobReport.getAllSkippedFiles() if skippedFiles and jobType not in ['LogCollect', 'Cleanup']: self.jobsWithSkippedFiles[jobID] = skippedFiles # Only save once job is done, and we're sure we made it through okay self._mapLocation(wmbsJob['fwjr']) if jobSuccess: self.listOfJobsToSave.append(wmbsJob) else: self.listOfJobsToFail.append(wmbsJob) return jobSuccess
class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBSBuffer.Database", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault = False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = MockDBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = threading.currentThread().logger, dbinterface = threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def setupForKillTest(self, baAPI = None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daoFactory(classname = "Locations.New") changeStateAction = daoFactory(classname = "Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000) inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations = "goodse.cern.ch") inputFileB = File("lfnB", locations = "goodse.cern.ch") inputFileC = File("lfnC", locations = "goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations = "goodse.cern.ch") unmergedFileB = File("ulfnB", locations = "goodse.cern.ch") unmergedFileC = File("ulfnC", locations = "goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset = inputFileset, workflow = mainProcWorkflow, type = "Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription = self.mainProcSub) procJobGroup.create() self.procJobA = Job(name = "ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name = "ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name = "ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset = unmergedOutputFileset, workflow = mainProcMergeWorkflow, type = "Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription = self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name = "MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name = "MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name = "MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset = unmergedOutputFileset, workflow = mainCleanupWorkflow, type = "Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription = self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name = "CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name = "CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name = "CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = 'Steve' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs = jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations = "goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec = "spec2", owner = "Steve", name = "Bogus", task = "Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset = bogusFileset, workflow = bogusWorkflow, type = "Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual(list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config = config) # Create nine jobs self.setupForKillTest(baAPI = baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job = 1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size = 1, max_merge_size = 2, max_merge_events = 3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBase", files_per_job = 50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule = "Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job = 1, include_parents = True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) return testWorkload def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") testResubmitWMBSHelper = WMBSHelper(testWorkload, "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createSubscription() mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = fakeSiteDB() def createWMSpec(self, name = 'ReRecoWorkload'): wmspec = rerecoWorkload(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") return wmspec def createMCWMSpec(self, name = 'MonteCarloWorkload'): wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents = 10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = MockDBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask = None): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, block, mask, cachepath = self.workDir) if block: block = self.dbs.getFileBlock(block)[block] wmbs.createSubscriptionAndAddFiles(block = block) return wmbs # def testProduction(self): # """Production workflow""" # pass def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#2" #add run blacklist self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#2" # Run Whitelist self.topLevelTask.setInputRunWhitelist([2]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#1" wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testParentage(self): """ TODO: add the parentage test. 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ pass def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname = "Locations.New") ses = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName = site, seName = self.siteDB.cmsNametoSE(site)) ses.append(self.siteDB.cmsNametoSE(site)) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
def algorithm(self, parameters = None): """ Performs the handleErrors method, looking for each type of failure And deal with it as desired. """ filesCache = {} currentTime = time.time() pendingASOJobs = self.getJobsAction.execute(state = "asopending") for job_id in pendingASOJobs: workflow = self.getWorkflowNameFromJobId(job_id) self.logger.info("Processing %s from %s" % \ (job_id, workflow) ) job = Job( id = job_id ) job.load() jobReport = Report() jobReportPath = job['fwjr_path'] try: jobReportPath = jobReportPath.replace("file://","") jobReport.load( jobReportPath ) except Exception, _: # if we got here, we must've used to have had a FWJR, knock it back # to the JobAccountant, they can deal with it logging.info( "ASOTracker: %s has no FWJR, but it should if we got here" % job['id']) # FIXME Should find out how to store errors so the outside will see self.stateChanger.propagate(job, "complete", "asopending") continue # retrieve all the files for this workflow, if it exists if not workflow in filesCache: query = { 'startkey' : [workflow, int(self.lastMonitoringTimestamp - 120)], 'endkey' : [workflow, int(currentTime) + 1], 'reduce' : False } monFiles = self.asoMonDB.loadView('UserMonitoring',\ 'FilesByWorkflow',\ query) self.logger.info("Got this for files %s using %s" % (monFiles, query)) oneCache = {} for oneFile in monFiles['rows']: # Store the timestamp for the transferred file #newPfn = self.apply_tfc_to_lfn('%s:%s' % (destination, item['value'].replace('store/temp', 'store', 1))) print "PRESERVE LFN1 %s %s" % (oneFile['value']['lfn'], oneFile['value'].get('preserve_lfn', False)) if oneFile['value'].get("preserve_lfn", False) == False: lfn = oneFile['value']['lfn'].replace('store/temp', 'store', 1) else: lfn = oneFile['value']['lfn'] oneCache[lfn] = \ { 'state' : oneFile['value']['state'], 'lfn' : lfn, 'location' : self.phedexApi.getNodeSE( oneFile['value']['location'] )} filesCache[workflow] = oneCache asoFiles = oneCache else: asoFiles = filesCache[workflow] allFiles = jobReport.getAllFileRefs() # Look through each job state and update it filesFailed = False asoComplete = True for fwjrFile in allFiles: print "PRESERVE LFN2 %s %s" % (fwjrFile.lfn, getattr(fwjrFile, 'preserve_lfn', False)) if getattr(fwjrFile, "preserve_lfn", False) == False: lfn = fwjrFile.lfn.replace('store/temp', 'store', 1) else: lfn = fwjrFile.lfn #lfn = fwjrFile.lfn # if we wanted ASO, ASO is complete and the LFN is there if getattr(fwjrFile, "async_dest", None) and \ not getattr(fwjrFile, "asyncStatus", None): if not lfn in asoFiles: asoComplete = False continue if asoFiles[lfn]['state'] == 'done': fwjrFile.asyncStatus = 'Success' fwjrFile.lfn = lfn fwjrFile.location = asoFiles[lfn]['location'] jobReport.save( jobReportPath ) elif asoFiles[lfn]['state'] == 'failed': # TODO: need to propagate diagnostic info here fwjrFile.asyncStatus = 'Failed' jobReport.save( jobReportPath ) filesFailed = True else: asoComplete = False # Obviously need to change this to query the info from ASO # if a job failed, send it to asofailed instead if asoComplete: if not filesFailed: self.stateChanger.propagate(job, "complete", "asopending") else: self.stateChanger.propagate(job, "asofailed", "asopending")
def handleJob(self, jobID, fwkJobReport): """ _handleJob_ Figure out if a job was successful or not, handle it appropriately (parse FWJR, update WMBS) and return the success status as a boolean """ jobSuccess = fwkJobReport.taskSuccessful() outputMap = self.getOutputMapAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) jobType = self.getJobTypeAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) if jobSuccess: fileList = fwkJobReport.getAllFiles() # consistency check comparing outputMap to fileList # they should match except for some limited special cases outputModules = set([]) for fwjrFile in fileList: outputModules.add(fwjrFile['outputModule']) if set(outputMap.keys()) == outputModules: pass elif jobType == "LogCollect" and len(outputMap.keys()) == 0 and outputModules == set(['LogCollect']): pass elif jobType == "Merge" and set(outputMap.keys()) == set(['Merged', 'MergedError', 'logArchive']) and outputModules == set(['Merged', 'logArchive']): pass elif jobType == "Merge" and set(outputMap.keys()) == set(['Merged', 'MergedError', 'logArchive']) and outputModules == set(['MergedError', 'logArchive']): pass elif jobType == "Express" and set(outputMap.keys()).difference(outputModules) == set(['write_RAW']): pass else: failJob = True if jobType in [ "Processing", "Production" ]: cmsRunSteps = 0 for step in fwkJobReport.listSteps(): if step.startswith("cmsRun"): cmsRunSteps += 1 if cmsRunSteps > 1: failJob = False if failJob: jobSuccess = False logging.error("Job %d , list of expected outputModules does not match job report, failing job", jobID) logging.debug("Job %d , expected outputModules %s", jobID, sorted(outputMap.keys())) logging.debug("Job %d , fwjr outputModules %s", jobID, sorted(outputModules)) fileList = fwkJobReport.getAllFilesFromStep(step = 'logArch1') else: logging.debug("Job %d , list of expected outputModules does not match job report, accepted for multi-step CMSSW job", jobID) else: fileList = fwkJobReport.getAllFilesFromStep(step = 'logArch1') if jobSuccess: logging.info("Job %d , handle successful job", jobID) else: logging.error("Job %d , bad jobReport, failing job", jobID) # make sure the task name is present in FWJR (recover from WMBS if needed) if len(fileList) > 0: if jobSuccess: self.isTaskExistInFWJR(fwkJobReport, "success") else: self.isTaskExistInFWJR(fwkJobReport, "failed") # special check for LogCollect jobs skipLogCollect = False if jobSuccess and jobType == "LogCollect": for fwjrFile in fileList: try: # this assumes there is only one file for LogCollect jobs, not sure what happend if that changes self.associateLogCollectToParentJobsInWMStats(fwkJobReport, fwjrFile["lfn"], fwkJobReport.getTaskName()) except Exception as ex: skipLogCollect = True logging.error("Error occurred: associating log collect location, will try again\n %s" % str(ex)) break # now handle the job (unless the special LogCollect check failed) if not skipLogCollect: wmbsJob = Job(id = jobID) wmbsJob.load() outputID = wmbsJob.loadOutputID() wmbsJob.getMask() wmbsJob["fwjr"] = fwkJobReport if jobSuccess: wmbsJob["outcome"] = "success" else: wmbsJob["outcome"] = "failure" for fwjrFile in fileList: logging.debug("Job %d , register output %s", jobID, fwjrFile["lfn"]) wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID = jobID, task = fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputID}) # LogCollect jobs have no output fileset if jobType != "LogCollect": outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputFileset}) # Check if the job had any skipped files, put them in ACDC containers # We assume full file processing (no job masks) if jobSuccess: skippedFiles = fwkJobReport.getAllSkippedFiles() if skippedFiles: self.jobsWithSkippedFiles[jobID] = skippedFiles # Only save once job is done, and we're sure we made it through okay self._mapLocation(wmbsJob['fwjr']) if jobSuccess: self.listOfJobsToSave.append(wmbsJob) else: self.listOfJobsToFail.append(wmbsJob) return jobSuccess
def handleSuccessful(self, jobID, fwkJobReport, fwkJobReportPath=None): """ _handleSuccessful_ Handle a successful job, parsing the job report and updating the job in WMBS. """ wmbsJob = Job(id=jobID) wmbsJob.load() wmbsJob["outcome"] = "success" wmbsJob.getMask() outputID = wmbsJob.loadOutputID() wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) jobType = self.getJobTypeAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) fileList = fwkJobReport.getAllFiles() bookKeepingSuccess = True for fwjrFile in fileList: # associate logArchived file for parent jobs on wmstats assuming fileList is length is 1. if jobType == "LogCollect": try: self.associateLogCollectToParentJobsInWMStats( fwkJobReport, fwjrFile["lfn"], fwkJobReport.getTaskName()) except Exception as ex: bookKeepingSuccess = False logging.error( "Error occurred: associating log collect location, will try again\n %s" % str(ex)) break wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID=jobID, task=fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputID }) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputFileset }) # Check if the job had any skipped files # Put them in ACDC containers, we assume full file processing # No job masks skippedFiles = fwkJobReport.getAllSkippedFiles() if skippedFiles: self.jobsWithSkippedFiles[jobID] = skippedFiles if bookKeepingSuccess: # Only save once job is done, and we're sure we made it through okay self._mapLocation(wmbsJob['fwjr']) self.listOfJobsToSave.append(wmbsJob) #wmbsJob.save() return
def __call__(self, filesetToProcess): """ The algorithm itself """ # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) lastFileset = daofactory(classname="Fileset.ListFilesetByTask") lastWorkflow = daofactory(classname="Workflow.LoadFromTask") subsRun = daofactory(\ classname = "Subscriptions.LoadFromFilesetWorkflow") successJob = daofactory(classname="Subscriptions.SucceededJobs") allJob = daofactory(classname="Subscriptions.Jobs") fileInFileset = daofactory(classname="Files.InFileset") # Get the start Run if asked startRun = (filesetToProcess.name).split(":")[3] logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) fileType = (filesetToProcess.name).split(":")[2] crabTask = filesetToProcess.name.split(":")[0] LASTIME = filesetToProcess.lastUpdate tries = 1 while True: try: myRequester = JSONRequests(url="vocms52.cern.ch:8889") requestResult = myRequester.get("/tier0/runs") except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0ASTRunChain feeder queries done ...") now = time.time() break for listRun in requestResult[0]: if startRun != 'None' and int(listRun['run']) >= int(startRun): if listRun['status'] =='CloseOutExport' or listRun\ ['status']=='Complete' or listRun['status']=='CloseOutT1Skimming': crabWorkflow = lastWorkflow.execute(task=crabTask) crabFileset = lastFileset.execute\ (task=crabTask) crabrunFileset = Fileset(\ name = crabFileset[0]["name"].split(':')[0].split\ ('-Run')[0]+ '-Run' + str(listRun['run']) + ":" + \ ":".join(crabFileset[0]['name'].split(':')[1:]) ) if crabrunFileset.exists() > 0: crabrunFileset.load() currSubs = subsRun.execute\ (crabrunFileset.id, crabWorkflow[0]['id']) if currSubs: listsuccessJob = successJob.execute(\ subscription=currSubs['id']) listallJob = allJob.execute(\ subscription=currSubs['id']) if len(listsuccessJob) == len(listallJob): for currid in listsuccessJob: currjob = Job(id=currid) currjob.load() logging.debug("Reading FJR %s" % currjob['fwjr_path']) jobReport = readJobReport( currjob['fwjr_path']) if len(jobReport) > 0: if jobReport[0].files: for newFile in jobReport[0].files: logging.debug(\ "Output path %s" %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': \ newFileToAdd['id']} not in listFile: filesetToProcess.addFile(\ newFileToAdd) filesetToProcess\ .setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded and added by T0ASTRunChain...") elif jobReport[0].analysisFiles: for newFile in jobReport\ [0].analysisFiles: logging.debug(\ "Ouput path %s " %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': newFileToAdd\ ['id']} not in listFile: logging.debug\ ("%s loaded and added by T0ASTRunChain" %newFile['LFN']) filesetToProcess.addFile\ (newFileToAdd) filesetToProcess.\ setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded added by T0ASTRunChain...") else: break #Missed fjr - Try next time # Commit the fileset logging.debug("Test purge in T0ASTRunChain ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate # For re-opned fileset or empty, try until the purge time if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...")
def testH_PauseAlgo(self): """ _testH_PauseAlgo_ Test the pause algorithm, note that given pauseCount = n, the job will run first n + 1 times before being paused. After that it will be paused each n times """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) # adding a 2nd job group testJobGroup2 = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing': 'PauseAlgo'} config.RetryManager.section_("PauseAlgo") config.RetryManager.PauseAlgo.section_("Processing") config.RetryManager.PauseAlgo.Processing.coolOffTime = { 'create': 20, 'submit': 20, 'job': 20 } config.RetryManager.PauseAlgo.Processing.pauseCount = 2 changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) report = Report() # Making sure that jobs are not created ahead of time for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 15) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 25) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') # Make sure that no change happens before timeout for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 75) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be paused for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 85) # Make sure that the plugin pauses them testRetryManager.algorithm(None) idList = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList), self.nJobs) # Emulating ops retrying the job changer.propagate(testJobGroup.jobs, 'created', 'jobpaused') # Making sure it did the right thing idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 175) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 185) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 315) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='jobcooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 325) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList), self.nJobs) # a configurable retry count per job type {jobExitCodeA: pauseCountB} config.RetryManager.PauseAlgo.Processing.retryErrorCodes = { 8020: 1, 12345: 1, 5555: 2 } testRetryManager2 = RetryManagerPoller(config) testRetryManager2.algorithm() fwjrPath = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t", "fwjrs/badBackfillJobReport.pkl") report.load(fwjrPath) for job in testJobGroup2.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save( os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) # fail the jobs changer.propagate(testJobGroup2.jobs, 'created', 'new') changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') # Giving time so they can be paused for job in testJobGroup2.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 85) # Make sure that the plugin sent those jobs to the next state: testRetryManager2.algorithm() # job exit code is 8020, so it is supposed to be retried one time. # Meaning, that here we should have 10 jobs (from the first part of the test) in jobpaused # and 10 jobs in created state idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) idList2 = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList2), self.nJobs) # save a second job report - with a retry count = 1 for job in testJobGroup2.jobs: j = Job(id=job['id']) j.load() j['retry_count'] = 1 self.assertEqual(j['retry_count'], 1) report.save( os.path.join(j['cache_dir'], "Report.%i.pkl" % j['retry_count'])) # Fail them out again changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup2.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 175) # not sure if this check is needed: idList = self.getJobs.execute(state='jobcooloff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be paused for job in testJobGroup2.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 85) # Make sure that the plugin sent those jobs to paused state: testRetryManager2.algorithm(None) idList = self.getJobs.execute(state='jobpaused') # And again, in total, there should be 10+10=20 jobs in jobpaused self.assertEqual(len(idList), self.nJobs * 2) return
def testH_PauseAlgo(self): """ _testH_PauseAlgo_ Test the pause algorithm, note that given pauseCount = n, the job will run first n + 1 times before being paused. After that it will be paused each n times """ testJobGroup = self.createTestJobGroup(nJobs=self.nJobs) # adding a 2nd job group testJobGroup2 = self.createTestJobGroup(nJobs=self.nJobs) config = self.getConfig() config.RetryManager.plugins = {'Processing': 'PauseAlgo'} config.RetryManager.section_("PauseAlgo") config.RetryManager.PauseAlgo.section_("Processing") config.RetryManager.PauseAlgo.Processing.coolOffTime = {'create': 20, 'submit': 20, 'job': 20} config.RetryManager.PauseAlgo.Processing.pauseCount = 2 changer = ChangeState(config) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') testRetryManager = RetryManagerPoller(config) testRetryManager.setup(None) report = Report() # Making sure that jobs are not created ahead of time for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 15) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 25) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') # Make sure that no change happens before timeout for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 75) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be paused for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 85) # Make sure that the plugin pauses them testRetryManager.algorithm(None) idList = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList), self.nJobs) # Emulating ops retrying the job changer.propagate(testJobGroup.jobs, 'created', 'jobpaused') # Making sure it did the right thing idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 175) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='JobCoolOff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be retried for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 185) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) # Fail them out again changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'executing') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 315) testRetryManager.algorithm(None) idList = self.getJobs.execute(state='jobcooloff') self.assertEqual(len(idList), self.nJobs) for job in testJobGroup.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 325) # Make sure that the plugin allowed them to go back to created state testRetryManager.algorithm(None) idList = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList), self.nJobs) # a configurable retry count per job type {jobExitCodeA: pauseCountB} config.RetryManager.PauseAlgo.Processing.retryErrorCodes = {8020: 1, 12345: 1, 5555: 2} testRetryManager2 = RetryManagerPoller(config) testRetryManager2.algorithm() fwjrPath = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t", "fwjrs/badBackfillJobReport.pkl") report.load(fwjrPath) for job in testJobGroup2.jobs: job['fwjr'] = report job['retry_count'] = 0 report.save(os.path.join(job['cache_dir'], "Report.%i.pkl" % job['retry_count'])) # fail the jobs changer.propagate(testJobGroup2.jobs, 'created', 'new') changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') # Giving time so they can be paused for job in testJobGroup2.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 85) # Make sure that the plugin sent those jobs to the next state: testRetryManager2.algorithm() # job exit code is 8020, so it is supposed to be retried one time. # Meaning, that here we should have 10 jobs (from the first part of the test) in jobpaused # and 10 jobs in created state idList = self.getJobs.execute(state='created') self.assertEqual(len(idList), self.nJobs) idList2 = self.getJobs.execute(state='jobpaused') self.assertEqual(len(idList2), self.nJobs) # save a second job report - with a retry count = 1 for job in testJobGroup2.jobs: j = Job(id=job['id']) j.load() j['retry_count'] = 1 self.assertEqual(j['retry_count'], 1) report.save(os.path.join(j['cache_dir'], "Report.%i.pkl" % j['retry_count'])) # Fail them out again changer.propagate(testJobGroup2.jobs, 'executing', 'created') changer.propagate(testJobGroup2.jobs, 'complete', 'executing') changer.propagate(testJobGroup2.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup2.jobs, 'jobcooloff', 'jobfailed') for job in testJobGroup2.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 175) # not sure if this check is needed: idList = self.getJobs.execute(state='jobcooloff') self.assertEqual(len(idList), self.nJobs) # Giving time so they can be paused for job in testJobGroup2.jobs: self.setJobTime.execute(jobID=job["id"], stateTime=int(time.time()) - 85) # Make sure that the plugin sent those jobs to paused state: testRetryManager2.algorithm(None) idList = self.getJobs.execute(state='jobpaused') # And again, in total, there should be 10+10=20 jobs in jobpaused self.assertEqual(len(idList), self.nJobs * 2) return
def __call__(self, filesetToProcess): """ The algorithm itself """ # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) lastFileset = daofactory(classname = "Fileset.ListFilesetByTask") lastWorkflow = daofactory(classname = "Workflow.LoadFromTask") subsRun = daofactory(\ classname = "Subscriptions.LoadFromFilesetWorkflow") successJob = daofactory(classname = "Subscriptions.SucceededJobs") allJob = daofactory(classname = "Subscriptions.Jobs") fileInFileset = daofactory(classname = "Files.InFileset") # Get the start Run if asked startRun = (filesetToProcess.name).split(":")[3] logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) fileType = (filesetToProcess.name).split(":")[2] crabTask = filesetToProcess.name.split(":")[0] LASTIME = filesetToProcess.lastUpdate tries = 1 while True: try: myRequester = JSONRequests(url = "vocms52.cern.ch:8889") requestResult = myRequester.get("/tier0/runs") except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0ASTRunChain feeder queries done ...") now = time.time() break for listRun in requestResult[0]: if startRun != 'None' and int(listRun['run']) >= int(startRun): if listRun['status'] =='CloseOutExport' or listRun\ ['status']=='Complete' or listRun['status']=='CloseOutT1Skimming': crabWorkflow = lastWorkflow.execute(task=crabTask) crabFileset = lastFileset.execute\ (task=crabTask) crabrunFileset = Fileset(\ name = crabFileset[0]["name"].split(':')[0].split\ ('-Run')[0]+ '-Run' + str(listRun['run']) + ":" + \ ":".join(crabFileset[0]['name'].split(':')[1:]) ) if crabrunFileset.exists() > 0: crabrunFileset.load() currSubs = subsRun.execute\ (crabrunFileset.id, crabWorkflow[0]['id']) if currSubs: listsuccessJob = successJob.execute(\ subscription=currSubs['id']) listallJob = allJob.execute(\ subscription=currSubs['id']) if len(listsuccessJob) == len(listallJob): for currid in listsuccessJob: currjob = Job( id = currid ) currjob.load() logging.debug("Reading FJR %s" %currjob['fwjr_path']) jobReport = readJobReport(currjob['fwjr_path']) if len(jobReport) > 0: if jobReport[0].files: for newFile in jobReport[0].files: logging.debug(\ "Output path %s" %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': \ newFileToAdd['id']} not in listFile: filesetToProcess.addFile(\ newFileToAdd) filesetToProcess\ .setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded and added by T0ASTRunChain...") elif jobReport[0].analysisFiles: for newFile in jobReport\ [0].analysisFiles: logging.debug(\ "Ouput path %s " %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': newFileToAdd\ ['id']} not in listFile: logging.debug\ ("%s loaded and added by T0ASTRunChain" %newFile['LFN']) filesetToProcess.addFile\ (newFileToAdd) filesetToProcess.\ setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded added by T0ASTRunChain...") else: break #Missed fjr - Try next time # Commit the fileset logging.debug("Test purge in T0ASTRunChain ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate # For re-opned fileset or empty, try until the purge time if (int(now)/3600 - LASTIME/3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() if (int(now)/3600 - LASTIME/3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...")