def testCreateDeleteExistsNoFiles(self): """ _testCreateDeleteExistsNoFiles_ Create and then delete a job but don't add any input files to it. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="TestJob") assert testJob.exists() == False, "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, "ERROR: Job exists after it was delete" return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create a JobGroup and then delete it. Use the JobGroup's exists() method to determine if it exists before it is created, after it is created and after it is deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testFileset = WMBSFileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) self.assertFalse(testJobGroup.exists()) testJobGroup.create() self.assertTrue(testJobGroup.exists()) testJobGroup.delete() self.assertFalse(testJobGroup.exists()) testSubscription.delete() testFileset.delete() testWorkflow.delete() return
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"}) factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = ["Merged"] for goldenOutputMod in goldenOutputMods: mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def createSingleJobWorkflow(self): """ Create a workflow with one jobs and two files and store the results in instance variables """ self.testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") self.testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=self.testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() self.testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) self.testFileA.addRun(Run(1, *[45])) self.testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) self.testFileB.addRun(Run(1, *[46])) self.testFileA.create() self.testFileB.create() self.testJob = Job(name="TestJob", files=[self.testFileA, self.testFileB]) self.testJob.create(group=testJobGroup) self.testJob.associateFiles()
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec = makeUUID(), owner = "Steve", name = makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJobA = Job(name = makeUUID(), files = []) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroup) testJobA["state"] = "executing" testJobB = Job(name = makeUUID(), files = []) testJobB["couch_record"] = makeUUID() testJobB.create(group = testJobGroup) testJobB["state"] = "complete" testJobC = Job(name = makeUUID(), files = []) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def testCreateTransaction(self): """ _testCreateTransaction_ Create a JobGroup and commit it to the database. Rollback the database transaction and verify that the JobGroup is no longer in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testFileset = WMBSFileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) assert testJobGroup.exists() == False, "ERROR: Job group exists before it was created" myThread = threading.currentThread() myThread.transaction.begin() testJobGroup.create() assert testJobGroup.exists() >= 0, "ERROR: Job group does not exist after it was created" myThread.transaction.rollback() assert testJobGroup.exists() == False, "ERROR: Job group exists after transaction was rolled back." testSubscription.delete() testFileset.delete() testWorkflow.delete() return
def createWorkflow(self, task): """ Register job into WMBS for each task through Workflows """ specURL = self.getWorkflowURL(task) fileSet = Fileset(name=self.getFilesetName(task), is_open=True) fileSet.create() taskFlow = Workflow(spec=specURL, owner=self.owner, dn=self.owner_dn, name=self.getWorkflowName(task), task=task.name()) taskFlow.create() self.workflowDict[task.name()] = taskFlow # Insert workflow into task setattr(task.data.input.WMBS, 'WorkflowSpecURL', specURL) # If the job is a merge job # Find the task it merges from # Then find the workflow for that task and assign it an output if hasattr(task.inputReference(), 'outputModule'): dummyStepName = task.inputReference().inputStep.split('/')[-1] taskName = task.inputReference().inputStep.split('/')[-2] outputModule = task.inputReference().outputModule if taskName not in self.workflowDict.keys(): raise Exception('I am being asked to chain output for a task %s which does not yet exist' % taskName) outputWorkflow = self.workflowDict[taskName] outputWorkflow.addOutput(outputModule, fileSet) logging.info('Registered workflow for step %s' % (task.name())) return taskFlow, fileSet
def createTestJob(subscriptionType="Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name=makeUUID(), files=[testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup) testJob.associateFiles() return testJob
def testDifferentSubscritionIDs(self): """ _testDifferentSubscriptionIDs_ Make sure that the merge splitting still runs if the subscription ID is not equal to the workflow ID. """ myThread = threading.currentThread() myThread.transaction.begin() dummyWorkflow = Workflow(name = "dummyWorkflow", spec = "bunk49", owner = "Steve", task="Test2") dummyWorkflow.create() dummyFileset = Fileset(name = "dummyFileset") dummyFileset.create() dummySubscription1 = Subscription(fileset = dummyFileset, workflow = dummyWorkflow, split_algo = "ParentlessMergeBySize") dummySubscription2 = Subscription(fileset = dummyFileset, workflow = dummyWorkflow, split_algo = "ParentlessMergeBySize") dummySubscription1.create() dummySubscription2.create() myThread.transaction.commit() self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = self.mergeSubscription) result = jobFactory(min_merge_size = 4097, max_merge_size = 99999999, max_merge_events = 999999999, merge_across_runs = False) self.assertEqual(len(result), 1) jobGroup = result[0] self.assertEqual(len(jobGroup.jobs), 2) return
def atestD_Timing(self): """ _Timing_ This is to see how fast things go. """ return myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10) testTaskArchiver = TaskArchiverPoller(config=config) startTime = time.time() testTaskArchiver.algorithm() stopTime = time.time() result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) testWMBSFileset = Fileset(id=1) self.assertEqual(testWMBSFileset.exists(), False) logging.info("TaskArchiver took %f seconds" % (stopTime - startTime))
def createTestSubscription(self, nFiles, nSites=1, closeFileset=False): """ _createTestSubscription_ Create a set of test subscriptions for testing purposes. """ if nSites > self.nSites: nSites = self.nSites testFileset = Fileset(name="TestFileset") testFileset.create() # Create a testWorkflow testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() # Create the files for each site for s in range(nSites): for i in range(nFiles): newFile = File(makeUUID(), size=1024, events=100, locations=set(["site%i.cern.ch" % s])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing" ) testSubscription.create() # Close the fileset if closeFileset: testFileset.markOpen(isOpen=False) return testSubscription
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() for i in range(nFiles): newFile = self.createFile("%s_%i" % (baseName, i), nEventsPerFile, i, lumisPerFile, "somese.cern.ch") newFile.create() testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile("%s_%i_2" % (baseName, i), nEventsPerFile, i, lumisPerFile, "otherse.cern.ch") newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing" ) testSubscription.create() return testSubscription
def setupRepackWorkflow(self): """ _setupRepackWorkflow_ Populate WMBS with a repack-like workflow, every subscription must be unfinished at first """ workflowName = 'Repack_Run481516_StreamZ' mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW', 'RepackMergewrite_SingleNeutrino_RAW'] self.stateMap = {'Merge': [], 'Processing Done': []} self.orderedStates = ['Merge', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') # Create a wmspec in disk workload = newWorkload(workflowName) repackTask = workload.newTask('Repack') for task in mergeTasks: repackTask.addTask(task) repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW') specPath = os.path.join(self.testDir, 'Repack.pkl') workload.save(specPath) # Populate WMBS topFileset = Fileset(name='TestStreamerFileset') topFileset.create() options = {'spec': specPath, 'owner': 'ItsAMeMario', 'name': workflowName, 'wfType': 'tier0'} topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName, **options) topLevelWorkflow.create() topLevelSub = Subscription(topFileset, topLevelWorkflow) topLevelSub.create() self.stateMap['Merge'].append(topFileset) for task in mergeTasks: mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options) mergeWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task) unmergedFileset.create() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.create() self.stateMap['Processing Done'].append(unmergedFileset) cleanupWorkflow = Workflow(task='/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW', **options) cleanupWorkflow.create() unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup') unmergedFileset.create() cleanupSub = Subscription(unmergedFileset, cleanupWorkflow) cleanupSub.create() return
def testParentageByJob(self): """ _testParentageByJob_ Tests the DAO that assigns parentage by Job """ testWorkflow = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testJobA = Job() testJobA.create(group = testJobGroup) testJobA.addFile(testFileParentA) testJobA.addFile(testFileParentB) testJobA.associateFiles() parentAction = self.daofactory(classname = "Files.SetParentageByJob") parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']}) testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, True, "ERROR: Unknown parent file") goldenFiles.remove(parentFile) self.assertEqual(len(goldenFiles), 0, "ERROR: Some parents are missing")
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, taskType='Processing', name=None, wfPrio=1, changeState=None): """ _createJobGroups_ Creates a series of jobGroups for submissions changeState is an instance of the ChangeState class to make job status changes """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production", priority=wfPrio) testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) if changeState: for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') return jobGroupList
def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a new job and commit it to the database. Start a new transaction and delete the file from the database. Verify that the file has been deleted. After that, roll back the transaction and verify that the job is once again in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() is False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" myThread = threading.currentThread() myThread.transaction.begin() testJob.delete() assert testJob.exists() is False, \ "ERROR: Job exists after it was delete" myThread.transaction.rollback() assert testJob.exists() >= 0, \ "ERROR: Job does not exist after transaction was rolled back." return
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]): """ Creates a series of jobGroups for submissions """ jobGroupList = [] testWorkflow = Workflow( spec=workloadSpec, owner="tapas", name=makeUUID(), task="basicWorkload/Production", owner_vogroup="phgroup", owner_vorole="cmsrole", ) testWorkflow.create() # Create subscriptions for i in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs( name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site, bl=bl, wl=wl, ) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList
def stuffWMBS(self): """ _stuffWMBS_ Stuff WMBS with workflows """ workflow = Workflow(spec = 'spec.xml', name = 'ReRecoTest_v0Emulator', task = '/ReRecoTest_v0Emulator/Test', priority = 10) workflow.create() inputFileset = Fileset(name = 'TestFileset') inputFileset.create() subscription = Subscription(inputFileset, workflow) subscription.create()
def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name = '%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup
def testGetGroupsByJobStateDAO(self): """ _testGetGroupsByJobStateDAO_ Verify that the GetGrounsByJobState DAO does what it is supposed to do. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = WMBSFileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroupA = JobGroup(subscription=testSubscription) testJobGroupA.create() testJobGroupB = JobGroup(subscription=testSubscription) testJobGroupB.create() testJobA = Job(name="TestJobA") testJobB = Job(name="TestJobB") testJobGroupA.add(testJobA) testJobGroupB.add(testJobB) testJobGroupA.commit() testJobGroupB.commit() myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) stateChangeAction = daofactory(classname="Jobs.ChangeState") testJobA["state"] = "complete" testJobB["state"] = "executing" stateChangeAction.execute(jobs=[testJobA, testJobB]) jobGroupAction = daofactory(classname="JobGroup.GetGroupsByJobState") jobGroups = jobGroupAction.execute(jobState="complete") assert len(jobGroups) == 1, \ "Error: Wrong number of job groups returned." assert jobGroups[0] == testJobGroupA.id, \ "Error: Wrong job group returned." return
def createLargerTestJobGroup(self, commitFlag=True): """ _createTestJobGroup_ """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = WMBSFileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10) testFileC.addRun(Run(10, *[12312])) testFileC.setLocation("T2_CH_CERN") testFileC.setLocation("malpaquet") testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10) testFileD.addRun(Run(10, *[12312])) testFileD.setLocation("T2_CH_CERN") testFileD.setLocation("malpaquet") testFileC.create() testFileD.create() testJobA = Job(name="TestJobA1") testJobA.addFile(testFileC) testJobB = Job(name="TestJobB1") testJobB.addFile(testFileD) testJobGroup.add(testJobA) testJobGroup.add(testJobB) for i in range(0, 100): testJob = Job(name="TestJob%i" % (i)) testJob.addFile(testFileC) testJobGroup.add(testJob) if commitFlag: testJobGroup.commit() return testJobGroup
def notestCreateDeleteExists(self): """ Create and then delete a job and workflow. Use the workunit class's exists() method to determine if the workunit has been written to the database before the job is created, after the job has been created, and after the workflow has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) testWU1 = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[45])) testWU2 = WorkUnit(taskID=testWorkflow.id, fileid=testFileB['id'], runLumi=Run(1, *[46])) self.assertFalse(testWU1.exists(), "WorkUnit exists before job was created") self.assertFalse(testWU2.exists(), "WorkUnit exists before job was created") testJob.create(group=testJobGroup) self.assertTrue(testWU1.exists(), "WorkUnit does not exist after job was created") self.assertTrue(testWU2.exists(), "WorkUnit does not exist after job was created") testJob.delete() self.assertTrue(testWU1.exists(), "WorkUnit does not exist after job is deleted") self.assertTrue(testWU2.exists(), "WorkUnit does not exist after job is deleted") testWorkflow.delete() self.assertFalse(testWU1.exists(), "WorkUnit exists after workflow is deleted") self.assertFalse(testWU2.exists(), "WorkUnit exists after workflow is deleted") return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create and then delete a job. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) # testWU = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[44])) self.assertFalse(testJob.exists(), "Job exists before it was created") # self.assertFalse(testWU.exists(), "WorkUnit exists before it was created") testJob.create(group=testJobGroup) self.assertTrue(testJob.exists(), "Job does not exist after it was created") # self.assertFalse(testWU.exists(), "WorkUnit exists when there is no work") # Test the getWorkflow method workflow = testJob.getWorkflow() self.assertEqual(workflow['task'], 'Test') self.assertEqual(workflow['name'], 'wf001') testJob.delete() self.assertFalse(testJob.exists(), "Job exists after it was deleted") # self.assertFalse(testWU.exists(), "WorkUnit exists after job is deleted") return
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, taskType='Processing', name=None): """ _createJobGroups_ Creates a series of jobGroups for submissions """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production", priority=1) testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList
def createDummyJobs(self, nJobs, location=None): """ _createDummyJobs_ Create some dummy jobs """ if not location: location = self.sites[0] nameStr = makeUUID() testWorkflow = Workflow( spec=nameStr, owner="tapas", name=nameStr, task="basicWorkload/Production", owner_vogroup="phgroup", owner_vorole="cmsrole", ) testWorkflow.create() testFileset = Fileset(name=nameStr) testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() jobList = [] for i in range(nJobs): testJob = Job(name="%s-%i" % (nameStr, i)) testJob["location"] = location testJob["custom"]["location"] = location testJob["userdn"] = "tapas" testJob["owner"] = "tapas" testJob["userrole"] = "cmsrole" testJob["usergroup"] = "phgroup" testJob.create(testJobGroup) jobList.append(testJob) return jobList
def load(self): """ _load_ Load all meta data associated with the JobGroup. This includes the JobGroup id, uid, last_update time, subscription id and output fileset id. Either the JobGroup id or uid must be specified for this to work. """ existingTransaction = self.beginTransaction() if self.id > 0: loadAction = self.daofactory(classname = "JobGroup.LoadFromID") result = loadAction.execute(self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) else: loadAction = self.daofactory(classname = "JobGroup.LoadFromUID") result = loadAction.execute(self.uid, conn = self.getDBConn(), transaction = self.existingTransaction()) self.id = result["id"] self.uid = result["uid"] self.lastUpdate = result["last_update"] self.subscription = Subscription(id = result["subscription"]) self.subscription.load() self.output = Fileset(id = result["output"]) self.output.load() self.jobs = [] self.commitTransaction(existingTransaction) return
def setUp(self): """ _setUp_ Create a single subscription with one file. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute(siteName = "site1", seName = "somese.cern.ch") locationAction.execute(siteName = "site2", seName = "otherse.cern.ch") self.testFileset = Fileset(name = "TestFileset1") self.testFileset.create() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test" ) testWorkflow.create() self.testSubscription = Subscription(fileset = self.testFileset, workflow = testWorkflow, split_algo = "Periodic", type = "Processing") self.testSubscription.create() return
def create(self): """ Add the new jobgroup to WMBS, create the output Fileset object """ myThread = threading.currentThread() existingTransaction = self.beginTransaction() # overwrite base class self.output for WMBS fileset self.output = Fileset(name=makeUUID()) self.output.create() if self.uid == None: self.uid = makeUUID() action = self.daofactory(classname="JobGroup.New") action.execute( self.uid, self.subscription["id"], self.output.id, conn=self.getDBConn(), transaction=self.existingTransaction(), ) self.id = self.exists() self.commitTransaction(existingTransaction) return
def testAddChecksumsByLFN(self): """ _testAddChecksumsByLFN_ Tests for adding checksums by DAO by LFN """ testWorkflow = Workflow(spec="hello", owner="mnorman", name="wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[45])) testFileB.create() testJobA = Job() testJobA.create(group=testJobGroup) testJobA.associateFiles() parentAction = self.daofactory(classname="Files.AddChecksumByLFN") binds = [ {"lfn": testFileA["lfn"], "cktype": "cksum", "cksum": 101}, {"lfn": testFileA["lfn"], "cktype": "adler32", "cksum": 201}, {"lfn": testFileB["lfn"], "cktype": "cksum", "cksum": 101}, ] parentAction.execute(bulkList=binds) testFileC = File(id=testFileA["id"]) testFileC.loadData() testFileD = File(id=testFileB["id"]) testFileD.loadData() self.assertEqual(testFileC["checksums"], {"adler32": "201", "cksum": "101"}) self.assertEqual(testFileD["checksums"], {"cksum": "101"}) return
def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name = baseName) testFileset.create() parentFile = File('%s_parent' % (baseName), size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000, events = 100, locations = "otherse.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() return testSubscription
def _checkTask(self, task, taskConf, centralConf): """ _checkTask_ Verify the correctness of the task """ if taskConf.get("InputTask") is not None: inpTaskPath = task.getPathName() inpTaskPath = inpTaskPath.replace(task.name(), "") inpTaskPath += "cmsRun1" self.assertEqual(task.data.input.inputStep, inpTaskPath, "Input step is wrong in the spec") self.assertTrue( taskConf["InputTask"] in inpTaskPath, "Input task is not in the path name for child task") if "MCPileup" in taskConf or "DataPileup" in taskConf: mcDataset = taskConf.get('MCPileup', None) dataDataset = taskConf.get('DataPileup', None) if mcDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.mc.dataset, [mcDataset]) if dataDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.data.dataset, [dataDataset]) workflow = Workflow(name=self.workload.name(), task=task.getPathName()) workflow.load() outputMods = outputModuleList(task) self.assertEqual(len(workflow.outputMap.keys()), len(outputMods), "Error: Wrong number of WF outputs") for outputModule in outputMods: filesets = workflow.outputMap[outputModule][0] merged = filesets['merged_output_fileset'] unmerged = filesets['output_fileset'] merged.loadData() unmerged.loadData() mergedset = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule + "/merged-Merged" if outputModule == "logArchive" or not taskConf.get("KeepOutput", True) \ or outputModule in taskConf.get("TransientOutputModules", []) or outputModule in centralConf.get("IgnoredOutputModules", []): mergedset = task.getPathName() + "/unmerged-" + outputModule unmergedset = task.getPathName() + "/unmerged-" + outputModule self.assertEqual(mergedset, merged.name, "Merged fileset name is wrong") self.assertEqual(unmergedset, unmerged.name, "Unmerged fileset name is wrong") if outputModule != "logArchive" and taskConf.get("KeepOutput", True) \ and outputModule not in taskConf.get("TransientOutputModules", []) \ and outputModule not in centralConf.get("IgnoredOutputModules", []): mergeTask = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule mergeWorkflow = Workflow(name=self.workload.name(), task=mergeTask) mergeWorkflow.load() self.assertTrue( "Merged" in mergeWorkflow.outputMap, "Merge workflow does not contain a Merged output key") mergedOutputMod = mergeWorkflow.outputMap['Merged'][0] mergedFileset = mergedOutputMod['merged_output_fileset'] unmergedFileset = mergedOutputMod['output_fileset'] mergedFileset.loadData() unmergedFileset.loadData() self.assertEqual(mergedFileset.name, mergedset, "Merged fileset name in merge task is wrong") self.assertEqual( unmergedFileset.name, mergedset, "Unmerged fileset name in merge task is wrong") mrgLogArch = mergeWorkflow.outputMap['logArchive'][0][ 'merged_output_fileset'] umrgLogArch = mergeWorkflow.outputMap['logArchive'][0][ 'output_fileset'] mrgLogArch.loadData() umrgLogArch.loadData() archName = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule + "/merged-logArchive" self.assertEqual( mrgLogArch.name, archName, "LogArchive merged fileset name is wrong in merge task") self.assertEqual( umrgLogArch.name, archName, "LogArchive unmerged fileset name is wrong in merge task") if outputModule != "logArchive": taskOutputMods = task.getOutputModulesForStep( stepName="cmsRun1") currentModule = getattr(taskOutputMods, outputModule) if taskConf.get("PrimaryDataset") is not None: self.assertEqual(currentModule.primaryDataset, taskConf["PrimaryDataset"], "Wrong primary dataset") processedDatasetParts = [ "AcquisitionEra, ProcessingString, ProcessingVersion" ] allParts = True for part in processedDatasetParts: if part in taskConf: self.assertTrue(part in currentModule.processedDataset, "Wrong processed dataset for module") else: allParts = False if allParts: self.assertEqual( "%s-%s-v%s" % (taskConf["AcquisitionEra"], taskConf["ProcessingString"], taskConf["ProcessingVersion"]), "Wrong processed dataset for module") # Test subscriptions if taskConf.get("InputTask") is None: inputFileset = "%s-%s-SomeBlock" % (self.workload.name(), task.name()) elif "Merge" in task.getPathName().split("/")[-2]: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "merged-Merged" else: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "unmerged-%s" % taskConf[ "InputFromOutputModule"] taskFileset = Fileset(name=inputFileset) taskFileset.loadData() taskSubscription = Subscription(fileset=taskFileset, workflow=workflow) taskSubscription.loadData() if taskConf.get("InputTask") is None and taskConf.get( "InputDataset") is None: # Production type self.assertEqual( taskSubscription["type"], "Production", "Error: Wrong subscription type for processing task") self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgo"], "Error: Wrong split algo for generation task") else: # Processing type self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for task") if taskSubscription["split_algo"] != "WMBSMergeBySize": self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgo'], "Splitting algo mismatch") else: self.assertEqual( taskFileset.name, inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"], "Subscription uses WMBSMergeBySize on a merge fileset") return
def deleteEverything(self): """ _deleteEverything_ This function should delete the subscription, and absolutely everything else having anything to do with the subscription that is NOT in use by any other piece. It should check for all the proper ownerships through a sequence of DAO calls that will take forever. Nothing except the taskArchiver should be calling this. """ existingTransaction = self.beginTransaction() jobGroups = self.getAllJobGroups() filesets = [] # The order here is important # You need to delete files and filesets from the bottom up # In order to not violate parentage # Get output filesets from jobGroups for jobGroupID in jobGroups: loadAction = self.daofactory(classname = "JobGroup.LoadFromID") result = loadAction.execute(jobGroupID, conn = self.getDBConn(), transaction = self.existingTransaction()) filesets.append(result['output']) # Get output filesets from the workflow for entry in self['workflow'].outputMap: for outputFilesets in self['workflow'].outputMap[entry]: wid = outputFilesets["output_fileset"].id if not wid in filesets: filesets.append(wid) # Do the input fileset LAST! filesets.append(self['fileset'].id) self.commitTransaction(existingTransaction) # First, jobs # If there are too many jobs, delete them in separate # transactions to reduce database load deleteAction = self.daofactory(classname = "Jobs.Delete") jobDeleteList = [] for job in self.getJobs(): jobDeleteList.append(job['id']) if len(jobDeleteList) > 0: if len(jobDeleteList) <= self.bulkDeleteLimit: existingTransaction = self.beginTransaction() deleteAction.execute(id = jobDeleteList, conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) else: while len(jobDeleteList) > 0: existingTransaction = self.beginTransaction() toDelete = jobDeleteList[:self.bulkDeleteLimit] jobDeleteList = jobDeleteList[self.bulkDeleteLimit:] deleteAction.execute(id = toDelete, conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) # Next jobGroups deleteAction = self.daofactory(classname = "JobGroup.Delete") existingTransaction = self.beginTransaction() for jobGroupID in jobGroups: deleteAction.execute(id = jobGroupID, conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) # Now, get the filesets that needs to be deleted action = self.daofactory(classname = "Fileset.CheckForDelete") existingTransaction = self.beginTransaction() results = action.execute(fileids = filesets, subid = self['id'], conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) deleteFilesets = [x['id'] for x in results] # Delete files in sets # Each set of files deleted in a separate transaction for filesetID in deleteFilesets: fileset = Fileset(id = filesetID) # Load the files filesetFiles = [] action = self.daofactory(classname = "Files.InFileset") results = action.execute(fileset = filesetID, conn = self.getDBConn(), transaction = self.existingTransaction()) for result in results: filesetFiles.append(result['fileid']) # Now get rid of unused files if len(filesetFiles) < 1: # if we have unused files, of course continue parent = self.daofactory(classname = "Files.DeleteParentCheck") action = self.daofactory(classname = "Files.DeleteCheck") if len(filesetFiles) <= self.bulkDeleteLimit: existingTransaction = self.beginTransaction() parent.execute(file = filesetFiles, fileset = fileset.id, conn = self.getDBConn(), transaction = self.existingTransaction()) action.execute(file = filesetFiles, fileset = fileset.id, conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) else: while len(filesetFiles) > 0: existingTransaction = self.beginTransaction() toDelete = filesetFiles[:self.bulkDeleteLimit] filesetFiles = filesetFiles[self.bulkDeleteLimit:] parent.execute(file = toDelete, fileset = fileset.id, conn = self.getDBConn(), transaction = self.existingTransaction()) action.execute(file = toDelete, fileset = fileset.id, conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) # Start a new transaction for filesets, workflow, and the subscription existingTransaction = self.beginTransaction() for filesetID in deleteFilesets: # Now actually delete the filesets action = self.daofactory(classname = "Fileset.DeleteCheck") deleteFilesets = action.execute(fileid = filesetID, subid = self['id'], conn = self.getDBConn(), transaction = self.existingTransaction()) #Next Workflow action = self.daofactory(classname = "Workflow.DeleteCheck") action.execute(workid = self["workflow"].id, subid = self["id"], conn = self.getDBConn(), transaction = self.existingTransaction()) self.delete() self.commitTransaction(existingTransaction) return
def stuffWMBS(self, workflowURL, name): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="s1", pnn="somese.cern.ch") mergeFileset = Fileset(name="mergeFileset") mergeFileset.create() bogusFileset = Fileset(name="bogusFileset") bogusFileset.create() mergeWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") mergeWorkflow.create() mergeSubscription = Subscription(fileset=mergeFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") mergeSubscription.create() dummySubscription = Subscription(fileset=bogusFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"}) file1.addRun(Run(1, *[45])) file1.create() file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"}) file2.addRun(Run(1, *[45])) file2.create() file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"}) file3.addRun(Run(1, *[45])) file3.create() file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations={"somese.cern.ch"}) file4.addRun(Run(1, *[45])) file4.create() fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"}) fileA.addRun(Run(1, *[46])) fileA.create() fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"}) fileB.addRun(Run(1, *[46])) fileB.create() fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"}) fileC.addRun(Run(1, *[46])) fileC.create() fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"}) fileI.addRun(Run(2, *[46])) fileI.create() fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"}) fileII.addRun(Run(2, *[46])) fileII.create() fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"}) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIV = File(lfn="fileIV", size=1024 * 1000000, events=1024, first_event=3072, locations={"somese.cern.ch"}) fileIV.addRun(Run(2, *[46])) fileIV.create() for fileObj in [ file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV ]: mergeFileset.addFile(fileObj) bogusFileset.addFile(fileObj) mergeFileset.commit() bogusFileset.commit() return
def testJobSerialization(self): """ _testJobSerialization_ Verify that serialization of a job works when adding a FWJR. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"])) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 1, \ "Error: Splitting should have created one job." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" change.propagate([testJobA], 'created', 'new') myReport = Report() reportPath = os.path.join(getTestBase(), "WMCore_t/JobStateMachine_t/Report.pkl") myReport.unpersist(reportPath) testJobA["fwjr"] = myReport change.propagate([testJobA], 'executing', 'created') changeStateDB = self.couchServer.connectDatabase(dbname = "changestate_t/fwjrs") allDocs = changeStateDB.document("_all_docs") self.assertEqual(len(allDocs["rows"]), 2, "Error: Wrong number of documents") result = changeStateDB.loadView("FWJRDump", "fwjrsByWorkflowName") self.assertEqual(len(result["rows"]), 1, "Error: Wrong number of rows.") for row in result["rows"]: couchJobDoc = changeStateDB.document(row["value"]["id"]) self.assertEqual(couchJobDoc["_rev"], row["value"]["rev"], "Error: Rev is wrong.") for resultRow in allDocs["rows"]: if resultRow["id"] != "_design/FWJRDump": fwjrDoc = changeStateDB.document(resultRow["id"]) break assert fwjrDoc["retrycount"] == 0, \ "Error: Retry count is wrong." assert len(fwjrDoc["fwjr"]["steps"].keys()) == 2, \ "Error: Wrong number of steps in FWJR." assert "cmsRun1" in fwjrDoc["fwjr"]["steps"].keys(), \ "Error: cmsRun1 step is missing from FWJR." assert "stageOut1" in fwjrDoc["fwjr"]["steps"].keys(), \ "Error: stageOut1 step is missing from FWJR." return
def testPersist(self): """ _testPersist_ This is the test class for function Propagate from module ChangeState """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() for i in range(4): newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 4, \ "Error: Splitting should have created four jobs." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" testJobB = jobGroup.jobs[1] testJobB["user"] = "******" testJobB["group"] = "DMWM" testJobB["taskType"] = "Processing" testJobC = jobGroup.jobs[2] testJobC["user"] = "******" testJobC["group"] = "DMWM" testJobC["taskType"] = "Processing" testJobD = jobGroup.jobs[3] testJobD["user"] = "******" testJobD["group"] = "DMWM" testJobD["taskType"] = "Processing" change.persist([testJobA, testJobB], "created", "new") change.persist([testJobC, testJobD], "new", "none") stateDAO = self.daoFactory(classname="Jobs.GetState") jobAState = stateDAO.execute(id=testJobA["id"]) jobBState = stateDAO.execute(id=testJobB["id"]) jobCState = stateDAO.execute(id=testJobC["id"]) jobDState = stateDAO.execute(id=testJobD["id"]) assert jobAState == "created" and jobBState =="created" and \ jobCState == "new" and jobDState == "new", \ "Error: Jobs didn't change state correctly." return
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', pnn='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', pnn='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def setUp(self): """ _setUp_ """ import WMQuality.TestInit WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious") self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", "T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state, state_time) VALUES (1, 'SomeSite', 1, 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_pnns (id, pnn) VALUES (1, 'SomePNN') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 1) """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") insertLumiDAO.execute(binds={'RUN': 1, 'LUMI': 1}, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") insertStreamerDAO = daoFactory(classname="RunConfig.InsertStreamer") insertStreamerDAO.execute(streamerPNN="SomePNN", binds={ 'RUN': 1, 'P5_ID': 1, 'LUMI': 1, 'STREAM': "Express", 'TIME': int(time.time()), 'LFN': "/streamer", 'FILESIZE': 0, 'EVENTS': 0 }, transaction=False) insertPromptCalibrationDAO = daoFactory( classname="RunConfig.InsertPromptCalibration") insertPromptCalibrationDAO.execute( { 'RUN': 1, 'STREAM': "Express", 'NUM_PRODUCER': 1 }, transaction=False) self.completeFilesDAO = wmbsDaoFactory( classname="Subscriptions.CompleteFiles") self.markPromptCalibrationFinishedDAO = daoFactory( classname="ConditionUpload.MarkPromptCalibrationFinished") self.fileset1 = Fileset(name="TestFileset1") self.fileset1.create() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Condition", type="Condition") self.subscription1.create() # set parentage chain and sqlite fileset alcaRecoFile = File("/alcareco", size=0, events=0) alcaRecoFile.addRun(Run(1, *[1])) alcaRecoFile.setLocation("SomePNN", immediateSave=False) alcaRecoFile.create() alcaPromptFile = File("/alcaprompt", size=0, events=0) alcaPromptFile.addRun(Run(1, *[1])) alcaPromptFile.setLocation("SomePNN", immediateSave=False) alcaPromptFile.create() sqliteFile = File("/sqlite", size=0, events=0) sqliteFile.create() self.fileset1.addFile(sqliteFile) self.fileset1.commit() results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details """, transaction=False)[0].fetchall() setParentageDAO = wmbsDaoFactory(classname="Files.SetParentage") setParentageDAO.execute(binds=[{ 'parent': "/streamer", 'child': "/alcareco" }, { 'parent': "/alcareco", 'child': "/alcaprompt" }, { 'parent': "/alcaprompt", 'child': "/sqlite" }], transaction=False) # default split parameters self.splitArgs = {} self.splitArgs['runNumber'] = 1 self.splitArgs['streamName'] = "Express" return
class ConditionTest(unittest.TestCase): """ _ExpressTest_ Test for Express job splitter """ def setUp(self): """ _setUp_ """ import WMQuality.TestInit WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious") self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", "T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) wmbsDaoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state, state_time) VALUES (1, 'SomeSite', 1, 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_pnns (id, pnn) VALUES (1, 'SomePNN') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 1) """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") insertLumiDAO.execute(binds={'RUN': 1, 'LUMI': 1}, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") insertStreamerDAO = daoFactory(classname="RunConfig.InsertStreamer") insertStreamerDAO.execute(streamerPNN="SomePNN", binds={ 'RUN': 1, 'P5_ID': 1, 'LUMI': 1, 'STREAM': "Express", 'TIME': int(time.time()), 'LFN': "/streamer", 'FILESIZE': 0, 'EVENTS': 0 }, transaction=False) insertPromptCalibrationDAO = daoFactory( classname="RunConfig.InsertPromptCalibration") insertPromptCalibrationDAO.execute( { 'RUN': 1, 'STREAM': "Express", 'NUM_PRODUCER': 1 }, transaction=False) self.completeFilesDAO = wmbsDaoFactory( classname="Subscriptions.CompleteFiles") self.markPromptCalibrationFinishedDAO = daoFactory( classname="ConditionUpload.MarkPromptCalibrationFinished") self.fileset1 = Fileset(name="TestFileset1") self.fileset1.create() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Condition", type="Condition") self.subscription1.create() # set parentage chain and sqlite fileset alcaRecoFile = File("/alcareco", size=0, events=0) alcaRecoFile.addRun(Run(1, *[1])) alcaRecoFile.setLocation("SomePNN", immediateSave=False) alcaRecoFile.create() alcaPromptFile = File("/alcaprompt", size=0, events=0) alcaPromptFile.addRun(Run(1, *[1])) alcaPromptFile.setLocation("SomePNN", immediateSave=False) alcaPromptFile.create() sqliteFile = File("/sqlite", size=0, events=0) sqliteFile.create() self.fileset1.addFile(sqliteFile) self.fileset1.commit() results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details """, transaction=False)[0].fetchall() setParentageDAO = wmbsDaoFactory(classname="Files.SetParentage") setParentageDAO.execute(binds=[{ 'parent': "/streamer", 'child': "/alcareco" }, { 'parent': "/alcareco", 'child': "/alcaprompt" }, { 'parent': "/alcaprompt", 'child': "/sqlite" }], transaction=False) # default split parameters self.splitArgs = {} self.splitArgs['runNumber'] = 1 self.splitArgs['streamName'] = "Express" return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def isPromptCalibFinished(self): """ _isPromptCalibFinished_ """ myThread = threading.currentThread() result = myThread.dbi.processData( """SELECT finished FROM prompt_calib """, transaction=False)[0].fetchall()[0][0] return result def countPromptCalibFiles(self): """ _deleteSplitLumis_ """ myThread = threading.currentThread() result = myThread.dbi.processData( """SELECT COUNT(*) FROM prompt_calib_file """, transaction=False)[0].fetchall()[0][0] return result def test00(self): """ _test00_ Make sure the job splitter behaves correctly. Just make sure the job splitter does nothing when the fileset is open and populates t0ast data structures when it's closed. In the later case all input files should be marked as acquired without creating a job as well. """ mySplitArgs = self.splitArgs.copy() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.assertEqual(self.isPromptCalibFinished(), 0, "ERROR: prompt_calib should not be finished") self.assertEqual(self.countPromptCalibFiles(), 0, "ERROR: there should be no prompt_calib_file") jobGroups = jobFactory(**mySplitArgs) self.assertEqual(self.isPromptCalibFinished(), 0, "ERROR: prompt_calib should not be finished") self.assertEqual(self.countPromptCalibFiles(), 1, "ERROR: there should be one prompt_calib_file") self.completeFilesDAO.execute(1, 4, transaction=False) self.markPromptCalibrationFinishedDAO.execute(1, 1, transaction=False) self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.assertEqual(self.isPromptCalibFinished(), 1, "ERROR: prompt_calib should be finished") self.assertEqual(self.countPromptCalibFiles(), 1, "ERROR: there should be one prompt_calib_file") return
def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN') """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN2') """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 2 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 5 }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "A" }, transaction = False) insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion") insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1, 'STREAM' : 'A', 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer") insertStreamerDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4, 'STREAM' : "A", 'LFN' : "/testLFN/A", 'FILESIZE' : 100, 'EVENTS' : 100, 'TIME' : int(time.time()) }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name = "TestFileset1") self.fileset2 = Fileset(name = "TestFileset2") self.fileset1.load() self.fileset2.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Repack", type = "Repack") self.subscription2 = Subscription(fileset = self.fileset2, workflow = workflow2, split_algo = "RepackMerge", type = "RepackMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction = False) # keep for later self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis") self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi") self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers") self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles") self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024 self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 100000000 self.splitArgs['maxInputFiles'] = 1000 self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024 return
def stuffWMBS(self, injected=True): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN") locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN") changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") self.mergeFileset = Fileset(name="mergeFileset") self.mergeFileset.create() self.bogusFileset = Fileset(name="bogusFileset") self.bogusFileset.create() self.mergeMergedFileset = Fileset(name="mergeMergedFileset") self.mergeMergedFileset.create() self.bogusMergedFileset = Fileset(name="bogusMergedFileset") self.bogusMergedFileset.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2", owner="Steve", task="Test") mergeWorkflow.create() markWorkflow = self.daoFactory(classname="Workflow.MarkInjectedWorkflows") markWorkflow.execute(names=[mergeWorkflow.name], injected=injected) self.mergeSubscription = Subscription(fileset=self.mergeFileset, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") self.mergeSubscription.create() self.bogusSubscription = Subscription(fileset=self.bogusFileset, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") inputFileset = Fileset(name="inputFileset") inputFileset.create() inputWorkflow = Workflow(name="inputWorkflow", spec="input", owner="Steve", task="Test") inputWorkflow.create() inputWorkflow.addOutput("output", self.mergeFileset, self.mergeMergedFileset) inputWorkflow.addOutput("output2", self.bogusFileset, self.bogusMergedFileset) bogusInputWorkflow = Workflow(name="bogusInputWorkflow", spec="input", owner="Steve", task="Test") bogusInputWorkflow.create() inputSubscription = Subscription(fileset=inputFileset, workflow=inputWorkflow) inputSubscription.create() bogusInputSubscription = Subscription(fileset=inputFileset, workflow=bogusInputWorkflow) bogusInputSubscription.create() parentFile1 = File(lfn="parentFile1") parentFile1.create() parentFile2 = File(lfn="parentFile2") parentFile2.create() parentFile3 = File(lfn="parentFile3") parentFile3.create() parentFile4 = File(lfn="parentFile4") parentFile4.create() self.parentFileSite2 = File(lfn="parentFileSite2") self.parentFileSite2.create() jobGroup1 = JobGroup(subscription=inputSubscription) jobGroup1.create() jobGroup2 = JobGroup(subscription=inputSubscription) jobGroup2.create() jobGroup3 = JobGroup(subscription=bogusInputSubscription) jobGroup3.create() testJob1 = Job() testJob1.addFile(parentFile1) testJob1.create(jobGroup1) testJob1["state"] = "cleanout" testJob1["oldstate"] = "new" testJob1["couch_record"] = "somejive" testJob1["retry_count"] = 0 testJob1["outcome"] = "success" testJob1.save() changeStateDAO.execute([testJob1]) testJob1A = Job() testJob1A.addFile(parentFile1) testJob1A.create(jobGroup3) testJob1A["state"] = "cleanout" testJob1A["oldstate"] = "new" testJob1A["couch_record"] = "somejive" testJob1A["retry_count"] = 0 testJob1A["outcome"] = "failure" testJob1A.save() changeStateDAO.execute([testJob1A]) testJob2 = Job() testJob2.addFile(parentFile2) testJob2.create(jobGroup1) testJob2["state"] = "cleanout" testJob2["oldstate"] = "new" testJob2["couch_record"] = "somejive" testJob2["retry_count"] = 0 testJob2["outcome"] = "success" testJob2.save() changeStateDAO.execute([testJob2]) testJob3 = Job() testJob3.addFile(parentFile3) testJob3.create(jobGroup2) testJob3["state"] = "cleanout" testJob3["oldstate"] = "new" testJob3["couch_record"] = "somejive" testJob3["retry_count"] = 0 testJob3["outcome"] = "success" testJob3.save() changeStateDAO.execute([testJob3]) testJob4 = Job() testJob4.addFile(parentFile4) testJob4.create(jobGroup2) testJob4["state"] = "cleanout" testJob4["oldstate"] = "new" testJob4["couch_record"] = "somejive" testJob4["retry_count"] = 0 testJob4["outcome"] = "failure" testJob4.save() changeStateDAO.execute([testJob4]) # We'll simulate a failed split by event job that the merger should # ignore. parentFile5 = File(lfn="parentFile5") parentFile5.create() testJob5 = Job() testJob5.addFile(parentFile5) testJob5.create(jobGroup2) testJob5["state"] = "cleanout" testJob5["oldstate"] = "new" testJob5["couch_record"] = "somejive" testJob5["retry_count"] = 0 testJob5["outcome"] = "success" testJob5.save() changeStateDAO.execute([testJob5]) testJob6 = Job() testJob6.addFile(parentFile5) testJob6.create(jobGroup2) testJob6["state"] = "cleanout" testJob6["oldstate"] = "new" testJob6["couch_record"] = "somejive" testJob6["retry_count"] = 0 testJob6["outcome"] = "failure" testJob6.save() changeStateDAO.execute([testJob6]) testJob7 = Job() testJob7.addFile(self.parentFileSite2) testJob7.create(jobGroup2) testJob7["state"] = "cleanout" testJob7["oldstate"] = "new" testJob7["couch_record"] = "somejive" testJob7["retry_count"] = 0 testJob7["outcome"] = "success" testJob7.save() changeStateDAO.execute([testJob7]) badFile1 = File(lfn="badFile1", size=10241024, events=10241024, first_event=0, locations={"T2_CH_CERN"}) badFile1.addRun(Run(1, *[45])) badFile1.create() badFile1.addParent(parentFile5["lfn"]) file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) file1.addRun(Run(1, *[45])) file1.create() file1.addParent(parentFile1["lfn"]) file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) file2.addRun(Run(1, *[45])) file2.create() file2.addParent(parentFile1["lfn"]) file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) file3.addRun(Run(1, *[45])) file3.create() file3.addParent(parentFile1["lfn"]) file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations={"T2_CH_CERN"}) file4.addRun(Run(1, *[45])) file4.create() file4.addParent(parentFile1["lfn"]) fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileA.addRun(Run(1, *[46])) fileA.create() fileA.addParent(parentFile2["lfn"]) fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileB.addRun(Run(1, *[46])) fileB.create() fileB.addParent(parentFile2["lfn"]) fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileC.addRun(Run(1, *[46])) fileC.create() fileC.addParent(parentFile2["lfn"]) fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileI.addRun(Run(2, *[46])) fileI.create() fileI.addParent(parentFile3["lfn"]) fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileII.addRun(Run(2, *[46])) fileII.create() fileII.addParent(parentFile3["lfn"]) fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIII.addParent(parentFile3["lfn"]) fileIV = File(lfn="fileIV", size=1024, events=1024, first_event=3072, locations={"T2_CH_CERN"}) fileIV.addRun(Run(2, *[46])) fileIV.create() fileIV.addParent(parentFile3["lfn"]) fileX = File(lfn="badFileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileX.addRun(Run(1, *[47])) fileX.create() fileX.addParent(parentFile4["lfn"]) fileY = File(lfn="badFileB", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileY.addRun(Run(1, *[47])) fileY.create() fileY.addParent(parentFile4["lfn"]) fileZ = File(lfn="badFileC", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileZ.addRun(Run(1, *[47])) fileZ.create() fileZ.addParent(parentFile4["lfn"]) jobGroup1.output.addFile(file1) jobGroup1.output.addFile(file2) jobGroup1.output.addFile(file3) jobGroup1.output.addFile(file4) jobGroup1.output.addFile(fileA) jobGroup1.output.addFile(fileB) jobGroup1.output.addFile(fileC) jobGroup1.output.commit() jobGroup2.output.addFile(fileI) jobGroup2.output.addFile(fileII) jobGroup2.output.addFile(fileIII) jobGroup2.output.addFile(fileIV) jobGroup2.output.addFile(fileX) jobGroup2.output.addFile(fileY) jobGroup2.output.addFile(fileZ) jobGroup2.output.addFile(badFile1) jobGroup2.output.commit() for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]: self.mergeFileset.addFile(fileObj) self.bogusFileset.addFile(fileObj) self.mergeFileset.commit() self.bogusFileset.commit() return
class WMBSMergeBySize(unittest.TestCase): def setUp(self): """ _setUp_ Boiler plate DB setup. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) return def tearDown(self): """ _tearDown_ Clear out WMBS. """ self.testInit.clearDatabase() return def stuffWMBS(self, injected=True): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN") locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN") changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") self.mergeFileset = Fileset(name="mergeFileset") self.mergeFileset.create() self.bogusFileset = Fileset(name="bogusFileset") self.bogusFileset.create() self.mergeMergedFileset = Fileset(name="mergeMergedFileset") self.mergeMergedFileset.create() self.bogusMergedFileset = Fileset(name="bogusMergedFileset") self.bogusMergedFileset.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2", owner="Steve", task="Test") mergeWorkflow.create() markWorkflow = self.daoFactory(classname="Workflow.MarkInjectedWorkflows") markWorkflow.execute(names=[mergeWorkflow.name], injected=injected) self.mergeSubscription = Subscription(fileset=self.mergeFileset, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") self.mergeSubscription.create() self.bogusSubscription = Subscription(fileset=self.bogusFileset, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") inputFileset = Fileset(name="inputFileset") inputFileset.create() inputWorkflow = Workflow(name="inputWorkflow", spec="input", owner="Steve", task="Test") inputWorkflow.create() inputWorkflow.addOutput("output", self.mergeFileset, self.mergeMergedFileset) inputWorkflow.addOutput("output2", self.bogusFileset, self.bogusMergedFileset) bogusInputWorkflow = Workflow(name="bogusInputWorkflow", spec="input", owner="Steve", task="Test") bogusInputWorkflow.create() inputSubscription = Subscription(fileset=inputFileset, workflow=inputWorkflow) inputSubscription.create() bogusInputSubscription = Subscription(fileset=inputFileset, workflow=bogusInputWorkflow) bogusInputSubscription.create() parentFile1 = File(lfn="parentFile1") parentFile1.create() parentFile2 = File(lfn="parentFile2") parentFile2.create() parentFile3 = File(lfn="parentFile3") parentFile3.create() parentFile4 = File(lfn="parentFile4") parentFile4.create() self.parentFileSite2 = File(lfn="parentFileSite2") self.parentFileSite2.create() jobGroup1 = JobGroup(subscription=inputSubscription) jobGroup1.create() jobGroup2 = JobGroup(subscription=inputSubscription) jobGroup2.create() jobGroup3 = JobGroup(subscription=bogusInputSubscription) jobGroup3.create() testJob1 = Job() testJob1.addFile(parentFile1) testJob1.create(jobGroup1) testJob1["state"] = "cleanout" testJob1["oldstate"] = "new" testJob1["couch_record"] = "somejive" testJob1["retry_count"] = 0 testJob1["outcome"] = "success" testJob1.save() changeStateDAO.execute([testJob1]) testJob1A = Job() testJob1A.addFile(parentFile1) testJob1A.create(jobGroup3) testJob1A["state"] = "cleanout" testJob1A["oldstate"] = "new" testJob1A["couch_record"] = "somejive" testJob1A["retry_count"] = 0 testJob1A["outcome"] = "failure" testJob1A.save() changeStateDAO.execute([testJob1A]) testJob2 = Job() testJob2.addFile(parentFile2) testJob2.create(jobGroup1) testJob2["state"] = "cleanout" testJob2["oldstate"] = "new" testJob2["couch_record"] = "somejive" testJob2["retry_count"] = 0 testJob2["outcome"] = "success" testJob2.save() changeStateDAO.execute([testJob2]) testJob3 = Job() testJob3.addFile(parentFile3) testJob3.create(jobGroup2) testJob3["state"] = "cleanout" testJob3["oldstate"] = "new" testJob3["couch_record"] = "somejive" testJob3["retry_count"] = 0 testJob3["outcome"] = "success" testJob3.save() changeStateDAO.execute([testJob3]) testJob4 = Job() testJob4.addFile(parentFile4) testJob4.create(jobGroup2) testJob4["state"] = "cleanout" testJob4["oldstate"] = "new" testJob4["couch_record"] = "somejive" testJob4["retry_count"] = 0 testJob4["outcome"] = "failure" testJob4.save() changeStateDAO.execute([testJob4]) # We'll simulate a failed split by event job that the merger should # ignore. parentFile5 = File(lfn="parentFile5") parentFile5.create() testJob5 = Job() testJob5.addFile(parentFile5) testJob5.create(jobGroup2) testJob5["state"] = "cleanout" testJob5["oldstate"] = "new" testJob5["couch_record"] = "somejive" testJob5["retry_count"] = 0 testJob5["outcome"] = "success" testJob5.save() changeStateDAO.execute([testJob5]) testJob6 = Job() testJob6.addFile(parentFile5) testJob6.create(jobGroup2) testJob6["state"] = "cleanout" testJob6["oldstate"] = "new" testJob6["couch_record"] = "somejive" testJob6["retry_count"] = 0 testJob6["outcome"] = "failure" testJob6.save() changeStateDAO.execute([testJob6]) testJob7 = Job() testJob7.addFile(self.parentFileSite2) testJob7.create(jobGroup2) testJob7["state"] = "cleanout" testJob7["oldstate"] = "new" testJob7["couch_record"] = "somejive" testJob7["retry_count"] = 0 testJob7["outcome"] = "success" testJob7.save() changeStateDAO.execute([testJob7]) badFile1 = File(lfn="badFile1", size=10241024, events=10241024, first_event=0, locations={"T2_CH_CERN"}) badFile1.addRun(Run(1, *[45])) badFile1.create() badFile1.addParent(parentFile5["lfn"]) file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) file1.addRun(Run(1, *[45])) file1.create() file1.addParent(parentFile1["lfn"]) file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) file2.addRun(Run(1, *[45])) file2.create() file2.addParent(parentFile1["lfn"]) file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) file3.addRun(Run(1, *[45])) file3.create() file3.addParent(parentFile1["lfn"]) file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations={"T2_CH_CERN"}) file4.addRun(Run(1, *[45])) file4.create() file4.addParent(parentFile1["lfn"]) fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileA.addRun(Run(1, *[46])) fileA.create() fileA.addParent(parentFile2["lfn"]) fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileB.addRun(Run(1, *[46])) fileB.create() fileB.addParent(parentFile2["lfn"]) fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileC.addRun(Run(1, *[46])) fileC.create() fileC.addParent(parentFile2["lfn"]) fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileI.addRun(Run(2, *[46])) fileI.create() fileI.addParent(parentFile3["lfn"]) fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileII.addRun(Run(2, *[46])) fileII.create() fileII.addParent(parentFile3["lfn"]) fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIII.addParent(parentFile3["lfn"]) fileIV = File(lfn="fileIV", size=1024, events=1024, first_event=3072, locations={"T2_CH_CERN"}) fileIV.addRun(Run(2, *[46])) fileIV.create() fileIV.addParent(parentFile3["lfn"]) fileX = File(lfn="badFileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileX.addRun(Run(1, *[47])) fileX.create() fileX.addParent(parentFile4["lfn"]) fileY = File(lfn="badFileB", size=1024, events=1024, first_event=1024, locations={"T2_CH_CERN"}) fileY.addRun(Run(1, *[47])) fileY.create() fileY.addParent(parentFile4["lfn"]) fileZ = File(lfn="badFileC", size=1024, events=1024, first_event=2048, locations={"T2_CH_CERN"}) fileZ.addRun(Run(1, *[47])) fileZ.create() fileZ.addParent(parentFile4["lfn"]) jobGroup1.output.addFile(file1) jobGroup1.output.addFile(file2) jobGroup1.output.addFile(file3) jobGroup1.output.addFile(file4) jobGroup1.output.addFile(fileA) jobGroup1.output.addFile(fileB) jobGroup1.output.addFile(fileC) jobGroup1.output.commit() jobGroup2.output.addFile(fileI) jobGroup2.output.addFile(fileII) jobGroup2.output.addFile(fileIII) jobGroup2.output.addFile(fileIV) jobGroup2.output.addFile(fileX) jobGroup2.output.addFile(fileY) jobGroup2.output.addFile(fileZ) jobGroup2.output.addFile(badFile1) jobGroup2.output.commit() for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]: self.mergeFileset.addFile(fileObj) self.bogusFileset.addFile(fileObj) self.mergeFileset.commit() self.bogusFileset.commit() return def testMinMergeSize1(self): """ _testMinMergeSize1_ Set the minimum merge size to be 20,000 bytes which is more than the sum of all file sizes in the WMBS instance. Verify that no merge jobs will be produced. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=20000, max_merge_size=2000000000, max_merge_events=200000000) assert len(result) == 0, \ "ERROR: No job groups should be returned." return def testMinMergeSize1a(self): """ _testMinMergeSize1a_ Set the minimum merge size to be 20,000 bytes which is more than the sum of all file sizes in the WMBS instance and mark the fileset as closed. Verify that one job containing all files is pushed out. """ self.stuffWMBS() self.mergeFileset.markOpen(False) splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=20000, max_merge_size=200000, max_merge_events=20000) assert len(result) == 1, \ "ERROR: More than one JobGroup returned: %s" % len(result) assert len(result[0].jobs) == 2, \ "Error: Two jobs should have been returned." goldenFilesA = ["file1", "file2", "file3", "file4", "fileA", "fileB", "fileC"] goldenFilesB = ["fileI", "fileII", "fileIII", "fileIV"] for job in result[0].jobs: self.assertEqual(job["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"}) jobFiles = job.getFiles() if len(jobFiles) == len(goldenFilesA): self.assertEqual(job["estimatedDiskUsage"], 7) goldenFiles = goldenFilesA else: self.assertEqual(job["estimatedDiskUsage"], 4) goldenFiles = goldenFilesB currentRun = 0 currentLumi = 0 currentEvent = 0 for fileObj in jobFiles: fileObj.loadData() assert fileObj["lfn"] in goldenFiles, \ "Error: Unknown file: %s" % fileObj["lfn"] goldenFiles.remove(fileObj["lfn"]) fileRun = list(fileObj["runs"])[0].run fileLumi = min(list(fileObj["runs"])[0]) fileEvent = fileObj["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run." if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent return def testMinMergeSize2(self): """ _testMinMergeSize2_ Set the minimum merge size to be 7,167 bytes which is one byte less than the sum of all the file sizes in the largest merge group in the WMBS instance. Verify that one merge job containing all the files in the largest merge group is produced. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=7167, max_merge_size=20000, max_merge_events=20000) assert len(result) == 1, \ "ERROR: More than one JobGroup returned: %d" % len(result) assert len(result[0].jobs) == 1, \ "ERROR: One job should have been returned." self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 7) self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"}) jobFiles = list(result[0].jobs)[0].getFiles() goldenFiles = ["file1", "file2", "file3", "file4", "fileA", "fileB", "fileC"] assert len(jobFiles) == len(goldenFiles), \ "ERROR: Merge job should contain %d files." % len(goldenFiles) currentRun = 0 currentLumi = 0 currentEvent = 0 for fileObj in jobFiles: assert fileObj["lfn"] in goldenFiles, \ "Error: Unknown file: %s" % fileObj["lfn"] goldenFiles.remove(fileObj["lfn"]) fileRun = list(fileObj["runs"])[0].run fileLumi = min(list(fileObj["runs"])[0]) fileEvent = fileObj["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run." if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent return def testMaxMergeSize1(self): """ _testMaxMergeSize1_ Set the maximum merge size to be two bytes. Verify that three merge jobs are created, one for each job group that exists inside the WMBS instance. Verify that each merge job contains the expected files. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=1, max_merge_size=2, max_merge_events=20000) assert len(result) == 1, \ "ERROR: More than one JobGroup returned: %s" % result assert len(result[0].jobs) == 3, \ "ERROR: Three jobs should have been returned." self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"}) goldenFilesA = ["file1", "file2", "file3", "file4"] goldenFilesB = ["fileA", "fileB", "fileC"] goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"] for job in result[0].jobs: jobFiles = job.getFiles() if jobFiles[0]["lfn"] in goldenFilesA: self.assertEqual(job["estimatedDiskUsage"], 4) goldenFiles = goldenFilesA elif jobFiles[0]["lfn"] in goldenFilesB: self.assertEqual(job["estimatedDiskUsage"], 3) goldenFiles = goldenFilesB else: self.assertEqual(job["estimatedDiskUsage"], 4) goldenFiles = goldenFilesC currentRun = 0 currentLumi = 0 currentEvent = 0 for fileObj in jobFiles: assert fileObj["lfn"] in goldenFiles, \ "Error: Unknown file in merge jobs." goldenFiles.remove(fileObj["lfn"]) fileRun = list(fileObj["runs"])[0].run fileLumi = min(list(fileObj["runs"])[0]) fileEvent = fileObj["first_event"] if currentRun == 0: continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run." if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \ len(goldenFilesC) == 0, \ "ERROR: Files missing from merge jobs." return def testMaxMergeSize2(self): """ _testMaxMergeSize2_ Set the minimum merge size to be one byte larger than the largest job group in the WMBS instance and the max merge size to be one byte larger than the total size of two of the groups. Verify that one merge job is produced with two of the job groups in it. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=7169, max_merge_events=20000) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 1, \ "ERROR: One job should have been returned." goldenFilesA = ["file1", "file2", "file3", "file4"] goldenFilesB = ["fileA", "fileB", "fileC"] goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"] self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 7) self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"}) jobFiles = list(result[0].jobs)[0].getFiles() currentRun = 0 currentLumi = 0 currentEvent = 0 for fileObj in jobFiles: if fileObj["lfn"] in goldenFilesA: goldenFilesA.remove(fileObj["lfn"]) elif fileObj["lfn"] in goldenFilesB: goldenFilesB.remove(fileObj["lfn"]) elif fileObj["lfn"] in goldenFilesC: goldenFilesC.remove(fileObj["lfn"]) fileRun = list(fileObj["runs"])[0].run fileLumi = min(list(fileObj["runs"])[0]) fileEvent = fileObj["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run." if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent assert len(goldenFilesB) == 0 and \ (len(goldenFilesA) == 0 or len(goldenFilesC) == 0), \ "ERROR: Files not allocated to jobs correctly." return def testMaxEvents1(self): """ _testMaxEvents1_ Set the maximum number of events per merge job to 1. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=1, max_merge_size=20000, max_merge_events=1) assert len(result) == 1, \ "ERROR: More than one JobGroup returned: %s" % result assert len(result[0].jobs) == 3, \ "ERROR: Three jobs should have been returned: %s" % len(result[0].jobs) goldenFilesA = ["file1", "file2", "file3", "file4"] goldenFilesB = ["fileA", "fileB", "fileC"] goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"] for job in result[0].jobs: self.assertEqual(job["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"}) jobFiles = job.getFiles() if jobFiles[0]["lfn"] in goldenFilesA: self.assertEqual(job["estimatedDiskUsage"], 4) goldenFiles = goldenFilesA elif jobFiles[0]["lfn"] in goldenFilesB: self.assertEqual(job["estimatedDiskUsage"], 3) goldenFiles = goldenFilesB else: self.assertEqual(job["estimatedDiskUsage"], 4) goldenFiles = goldenFilesC currentRun = 0 currentLumi = 0 currentEvent = 0 for fileObj in jobFiles: assert fileObj["lfn"] in goldenFiles, \ "Error: Unknown file in merge jobs." goldenFiles.remove(fileObj["lfn"]) fileRun = list(fileObj["runs"])[0].run fileLumi = min(list(fileObj["runs"])[0]) fileEvent = fileObj["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun) if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \ len(goldenFilesC) == 0, \ "ERROR: Files missing from merge jobs." return def testMaxEvents2(self): """ _testMaxEvents2_ Set the minimum merge size to be one byte larger than the largest job group in the WMBS instance and the max events to be one event larger than the total events in two of the groups. Verify that one merge job is produced with two of the job groups in it. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=20000, max_merge_events=7169) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 1, \ "ERROR: One job should have been returned." self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 7) self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"}) goldenFilesA = ["file1", "file2", "file3", "file4"] goldenFilesB = ["fileA", "fileB", "fileC"] goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"] jobFiles = list(result[0].jobs)[0].getFiles() currentRun = 0 currentLumi = 0 currentEvent = 0 for fileObj in jobFiles: if fileObj["lfn"] in goldenFilesA: goldenFilesA.remove(fileObj["lfn"]) elif fileObj["lfn"] in goldenFilesB: goldenFilesB.remove(fileObj["lfn"]) elif fileObj["lfn"] in goldenFilesC: goldenFilesC.remove(fileObj["lfn"]) fileRun = list(fileObj["runs"])[0].run fileLumi = min(list(fileObj["runs"])[0]) fileEvent = fileObj["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run." if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent assert len(goldenFilesB) == 0 and \ (len(goldenFilesA) == 0 or len(goldenFilesC) == 0), \ "ERROR: Files not allocated to jobs correctly." return def testParallelProcessing(self): """ _testParallelProcessing_ Verify that merging works correctly when multiple processing subscriptions are run over the same input files. The merging algorithm should ignore processing jobs that feed into different merge subscriptions. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN") locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN") mergeFilesetA = Fileset(name="mergeFilesetA") mergeFilesetB = Fileset(name="mergeFilesetB") mergeFilesetA.create() mergeFilesetB.create() mergeMergedFilesetA = Fileset(name="mergeMergedFilesetA") mergeMergedFilesetB = Fileset(name="mergeMergedFilesetB") mergeMergedFilesetA.create() mergeMergedFilesetB.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bogus", owner="Steve", task="Test") mergeWorkflow.create() mergeSubscriptionA = Subscription(fileset=mergeFilesetA, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") mergeSubscriptionB = Subscription(fileset=mergeFilesetB, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") mergeSubscriptionA.create() mergeSubscriptionB.create() inputFileset = Fileset(name="inputFileset") inputFileset.create() inputFileA = File(lfn="inputLFNA") inputFileB = File(lfn="inputLFNB") inputFileA.create() inputFileB.create() procWorkflowA = Workflow(name="procWorkflowA", spec="bunk2", owner="Steve", task="Test") procWorkflowA.create() procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA) procWorkflowB = Workflow(name="procWorkflowB", spec="bunk3", owner="Steve", task="Test2") procWorkflowB.create() procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB) procSubscriptionA = Subscription(fileset=inputFileset, workflow=procWorkflowA, split_algo="EventBased") procSubscriptionA.create() procSubscriptionB = Subscription(fileset=inputFileset, workflow=procWorkflowB, split_algo="EventBased") procSubscriptionB.create() jobGroupA = JobGroup(subscription=procSubscriptionA) jobGroupA.create() jobGroupB = JobGroup(subscription=procSubscriptionB) jobGroupB.create() changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") testJobA = Job() testJobA.addFile(inputFileA) testJobA.create(jobGroupA) testJobA["state"] = "cleanout" testJobA["oldstate"] = "new" testJobA["couch_record"] = "somejive" testJobA["retry_count"] = 0 testJobA["outcome"] = "success" testJobA.save() testJobB = Job() testJobB.addFile(inputFileB) testJobB.create(jobGroupA) testJobB["state"] = "cleanout" testJobB["oldstate"] = "new" testJobB["couch_record"] = "somejive" testJobB["retry_count"] = 0 testJobB["outcome"] = "success" testJobB.save() testJobC = Job() testJobC.addFile(inputFileA) testJobC.create(jobGroupB) testJobC["state"] = "cleanout" testJobC["oldstate"] = "new" testJobC["couch_record"] = "somejive" testJobC["retry_count"] = 0 testJobC["outcome"] = "success" testJobC.save() testJobD = Job() testJobD.addFile(inputFileA) testJobD.create(jobGroupB) testJobD["state"] = "cleanout" testJobD["oldstate"] = "new" testJobD["couch_record"] = "somejive" testJobD["retry_count"] = 0 testJobD["outcome"] = "failure" testJobD.save() testJobE = Job() testJobE.addFile(inputFileB) testJobE.create(jobGroupB) testJobE["state"] = "cleanout" testJobE["oldstate"] = "new" testJobE["couch_record"] = "somejive" testJobE["retry_count"] = 0 testJobE["outcome"] = "success" testJobE.save() testJobF = Job() testJobF.addFile(inputFileB) testJobF.create(jobGroupB) testJobF["state"] = "cleanout" testJobF["oldstate"] = "new" testJobF["couch_record"] = "somejive" testJobF["retry_count"] = 0 testJobF["outcome"] = "failure" testJobF.save() changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD, testJobE, testJobF]) fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileA.addRun(Run(1, *[45])) fileA.create() fileA.addParent(inputFileA["lfn"]) fileB = File(lfn="fileB", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileB.addRun(Run(1, *[45])) fileB.create() fileB.addParent(inputFileB["lfn"]) jobGroupA.output.addFile(fileA) jobGroupA.output.addFile(fileB) jobGroupA.output.commit() mergeFilesetA.addFile(fileA) mergeFilesetA.addFile(fileB) mergeFilesetA.commit() fileC = File(lfn="fileC", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileC.addRun(Run(1, *[45])) fileC.create() fileC.addParent(inputFileA["lfn"]) fileD = File(lfn="fileD", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileD.addRun(Run(1, *[45])) fileD.create() fileD.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileC) jobGroupB.output.addFile(fileD) mergeFilesetB.addFile(fileC) mergeFilesetB.addFile(fileD) mergeFilesetB.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=mergeSubscriptionB) result = jobFactory(min_merge_size=1, max_merge_size=20000, max_merge_events=7169) assert len(result) == 0, \ "Error: No merge jobs should have been created." fileE = File(lfn="fileE", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileE.addRun(Run(1, *[45])) fileE.create() fileE.addParent(inputFileA["lfn"]) fileF = File(lfn="fileF", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileF.addRun(Run(1, *[45])) fileF.create() fileF.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileE) jobGroupB.output.addFile(fileF) mergeFilesetB.addFile(fileE) mergeFilesetB.addFile(fileF) mergeFilesetB.commit() testJobD["outcome"] = "success" testJobD.save() testJobF["outcome"] = "success" testJobF.save() changeStateDAO.execute([testJobD, testJobF]) result = jobFactory(min_merge_size=1, max_merge_size=20000, max_merge_events=7169) assert len(result) == 1, \ "Error: One merge job should have been created: %s" % len(result) return def testLocationMerging(self): """ _testLocationMerging_ Verify that files residing on different SEs are not merged together in the same job. """ self.stuffWMBS() locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk") fileSite2 = File(lfn="fileSite2", size=4098, events=1024, first_event=0, locations={"T1_UK_RAL_Disk"}) fileSite2.addRun(Run(1, *[46])) fileSite2.create() fileSite2.addParent(self.parentFileSite2["lfn"]) self.mergeFileset.addFile(fileSite2) self.mergeFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=99999999, max_merge_events=999999999) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 2, \ "ERROR: Two jobs should have been returned." ralJobs = 0 fnalcernJobs = 0 for job in result[0].jobs: if job["possiblePSN"] == {"T1_UK_RAL"}: ralJobs += 1 elif job["possiblePSN"] == {"T1_US_FNAL", "T2_CH_CERN"}: fnalcernJobs += 1 self.assertEqual(ralJobs, 1) self.assertEqual(fnalcernJobs, 1) return def testFilesetCloseout(self): """ _testFilesetCloseout_ Verify that the merge algorithm works correctly when it's input fileset is closed. The split algorithm should create merge jobs for all files regardless of size and then mark any orphaned files (files that are the result of a split by lumi / split by event where one of the parent processing jobs has failed while others have succeeded) as failed so that the fileset closing works. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) # Get out all the good merge jobs out of the way. result = jobFactory(min_merge_size=1, max_merge_size=999999999999, max_merge_events=999999999) # Verify that the bad files are the only "available" files availableAction = self.daoFactory(classname="Subscriptions.GetAvailableFilesMeta") availFiles = availableAction.execute(self.mergeSubscription["id"]) assert len(availFiles) == 4, \ "Error: Wrong number of available files." goldenFiles = ["badFile1", "badFileA", "badFileB", "badFileC"] for availFile in availFiles: assert availFile["lfn"] in goldenFiles, \ "Error: Extra file is available." goldenFiles.remove(availFile["lfn"]) self.mergeFileset.markOpen(False) result = jobFactory(min_merge_size=1, max_merge_size=999999999999, max_merge_events=999999999) assert len(result) == 0, \ "Error: Merging should have returned zero jobs." self.mergeFileset.markOpen(False) availFiles2 = availableAction.execute(self.mergeSubscription["id"]) assert len(availFiles2) == 0, \ "Error: There should be no more available files." failedAction = self.daoFactory(classname="Subscriptions.GetFailedFiles") failedFiles = failedAction.execute(self.mergeSubscription["id"]) assert len(failedFiles) == 4, \ "Error: Wrong number of failed files: %s" % failedFiles goldenIDs = [] for availFile in availFiles: goldenIDs.append(availFile["id"]) for failedFile in failedFiles: assert failedFile["file"] in goldenIDs, \ "Error: Extra failed file." return def testFilesetCloseout2(self): """ _testFilesetCloseout2_ Verify that the fail orphan file code does not fail files that have failed for other workflows. """ self.stuffWMBS() self.mergeFileset.markOpen(False) splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) # Get out all the good merge jobs out of the way. result = jobFactory(min_merge_size=1, max_merge_size=999999999999, max_merge_events=999999999) self.assertEqual(len(result), 1, "Error: Wrong number of job groups.") self.assertEqual(len(result[0].jobs), 2, "Error: Wrong number of jobs.") failedAction = self.daoFactory(classname="Subscriptions.GetFailedFiles") failedFiles = failedAction.execute(self.mergeSubscription["id"]) self.assertEqual(len(failedFiles), 4, "Error: Wrong number of failed files: %s" % failedFiles) return def testForcedMerge(self): """ _testForcedMerge_ Repeat testMinMergeSize1a, but with non-injected files to assert that this causes no jobgroups to be created. """ self.stuffWMBS(injected=False) self.mergeFileset.markOpen(False) splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=20000, max_merge_size=200000, max_merge_events=20000) self.assertEqual(len(result), 0) return
def stuffWMBS(self): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T1_US_FNAL", pnn="T1_US_FNAL_Disk") self.mergeFileset = Fileset(name="mergeFileset") self.mergeFileset.create() self.bogusFileset = Fileset(name="bogusFileset") self.bogusFileset.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2", owner="Steve", task="Test") mergeWorkflow.create() markWorkflow = self.daoFactory( classname="Workflow.MarkInjectedWorkflows") markWorkflow.execute(names=[mergeWorkflow.name], injected=True) self.mergeSubscription = Subscription( fileset=self.mergeFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") self.mergeSubscription.create() self.bogusSubscription = Subscription( fileset=self.bogusFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations=set(["T1_US_FNAL_Disk"])) file1.addRun(Run(1, *[45])) file1.create() file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations=set(["T1_US_FNAL_Disk"])) file2.addRun(Run(1, *[45])) file2.create() file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations=set(["T1_US_FNAL_Disk"])) file3.addRun(Run(1, *[45])) file3.create() file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations=set(["T1_US_FNAL_Disk"])) file4.addRun(Run(1, *[45])) file4.create() fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations=set(["T1_US_FNAL_Disk"])) fileA.addRun(Run(1, *[46])) fileA.create() fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations=set(["T1_US_FNAL_Disk"])) fileB.addRun(Run(1, *[46])) fileB.create() fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations=set(["T1_US_FNAL_Disk"])) fileC.addRun(Run(1, *[46])) fileC.create() fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations=set(["T1_US_FNAL_Disk"])) fileI.addRun(Run(2, *[46])) fileI.create() fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations=set(["T1_US_FNAL_Disk"])) fileII.addRun(Run(2, *[46])) fileII.create() fileIII = File(lfn="fileIII", size=1024, events=102400, first_event=2048, locations=set(["T1_US_FNAL_Disk"])) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIV = File(lfn="fileIV", size=102400, events=1024, first_event=3072, locations=set(["T1_US_FNAL_Disk"])) fileIV.addRun(Run(2, *[46])) fileIV.create() for jobFile in [ file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV ]: self.mergeFileset.addFile(jobFile) self.bogusFileset.addFile(jobFile) self.mergeFileset.commit() self.bogusFileset.commit() return
def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', pnn='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return
def testRetryCount(self): """ _testRetryCount_ Verify that the retry count is incremented when we move out of the submitcooloff or jobcooloff state. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute("site1", pnn = "T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() for i in range(4): newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 4, \ "Error: Splitting should have created four jobs." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" testJobB = jobGroup.jobs[1] testJobB["user"] = "******" testJobB["group"] = "DMWM" testJobB["taskType"] = "Processing" testJobC = jobGroup.jobs[2] testJobC["user"] = "******" testJobC["group"] = "DMWM" testJobC["taskType"] = "Processing" testJobD = jobGroup.jobs[3] testJobD["user"] = "******" testJobD["group"] = "DMWM" testJobD["taskType"] = "Processing" change.persist([testJobA], "created", "submitcooloff") change.persist([testJobB], "created", "jobcooloff") change.persist([testJobC, testJobD], "new", "none") testJobA.load() testJobB.load() testJobC.load() testJobD.load() assert testJobA["retry_count"] == 1, \ "Error: Retry count is wrong." assert testJobB["retry_count"] == 1, \ "Error: Retry count is wrong." assert testJobC["retry_count"] == 0, \ "Error: Retry count is wrong." assert testJobD["retry_count"] == 0, \ "Error: Retry count is wrong." return
def testAnalysis(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchUrl"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "analysis_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig( ) defaultArguments["ProcessingVersion"] = 1 analysisProcessingFactory = AnalysisWorkloadFactory() testWorkload = analysisProcessingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "Analysis", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Analysis") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] #Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") output = procWorkflow.outputMap["output"][0]["output_fileset"] mergedOutput = procWorkflow.outputMap["output"][0][ "merged_output_fileset"] output.loadData() mergedOutput.loadData() self.assertEqual( output.name, "/TestWorkload/Analysis/unmerged-output", "Error: Unmerged output fileset is wrong: " + output.name) self.assertEqual( mergedOutput.name, "/TestWorkload/Analysis/unmerged-output", "Error: Unmerged output fileset is wrong: " + mergedOutput.name) topLevelFileset = Fileset(name="TestWorkload-Analysis-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Analysis", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/Analysis/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Analysis/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testDuplicateJobReports(self): """ _testDuplicateJobReports_ Verify that everything works correctly if a job report is added to the database more than once. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"])) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 1, \ "Error: Splitting should have created one job." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Processing" change.propagate([testJobA], 'created', 'new') myReport = Report() reportPath = os.path.join(getTestBase(), "WMCore_t/JobStateMachine_t/Report.pkl") myReport.unpersist(reportPath) testJobA["fwjr"] = myReport change.propagate([testJobA], 'executing', 'created') change.propagate([testJobA], 'executing', 'created') changeStateDB = self.couchServer.connectDatabase(dbname="changestate_t/fwjrs") allDocs = changeStateDB.document("_all_docs") self.assertEqual(len(allDocs["rows"]), 2, "Error: Wrong number of documents") for resultRow in allDocs["rows"]: if resultRow["id"] != "_design/FWJRDump": changeStateDB.document(resultRow["id"]) break return
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = { "write_RECO": "RECO", "write_ALCARECO": "ALCARECO", "write_AOD": "AOD", "write_DQM": "DQM" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = recoWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual( len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: fset = goldenOutputMod + "ALCARECO" mergedOutput = alcaSkimWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged" ) dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = { "write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual( len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( recoSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset( name="/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( alcaSkimSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset( name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = { "write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM" } for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = { "write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset( name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive" ) dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect" ) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
class RepackMergeTest(unittest.TestCase): """ _RepackMergeTest_ Test for RepackMerge job splitter """ def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN') """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 'SomePNN2') """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 2 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4 }, transaction = False) insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 5 }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "A" }, transaction = False) insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion") insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion") insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1, 'STREAM' : 'A', 'VERSION' : "CMSSW_4_2_7" }, transaction = False) insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer") insertStreamerDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4, 'STREAM' : "A", 'LFN' : "/testLFN/A", 'FILESIZE' : 100, 'EVENTS' : 100, 'TIME' : int(time.time()) }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name = "TestFileset1") self.fileset2 = Fileset(name = "TestFileset2") self.fileset1.load() self.fileset2.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Repack", type = "Repack") self.subscription2 = Subscription(fileset = self.fileset2, workflow = workflow2, split_algo = "RepackMerge", type = "RepackMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction = False) # keep for later self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis") self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi") self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers") self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles") self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024 self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 100000000 self.splitArgs['maxInputFiles'] = 1000 self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024 return def tearDown(self): """ _tearDown_ """ self.testInit.clearDatabase() return def deleteSplitLumis(self): """ _deleteSplitLumis_ """ myThread = threading.currentThread() myThread.dbi.processData("""DELETE FROM lumi_section_split_active """, transaction = False) return def test00(self): """ _test00_ Test that the job name prefix feature works Test max edm size threshold for single lumi small lumi, followed by over-large lumi expect 1 job for small lumi and 4 jobs for over-large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2 * lumi): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxEdmSize'] = 13000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("RepackMerge-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 3, "ERROR: Job does not process 3 files") job = jobGroups[0].jobs[2] self.assertEqual(len(job.getFiles()), 1, "ERROR: Job does not process 1 file") return def test01(self): """ _test01_ Test max size threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test02(self): """ _test02_ Test max event threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100 * lumi) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 300 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test03(self): """ _test03_ Test max input files threshold for single lumi small lumi, followed by large lumi expect 1 job for small lumi and 1 job for large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(lumi * 2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 3 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return def test04(self): """ _test04_ Test max size threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputSize'] = 5000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test05(self): """ _test05_ Test max event threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test06(self): """ _test06_ Test max input files threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test07(self): """ _test07_ Test over merge one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return def test08(self): """ _test08_ Test under merge (over merge size threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxOverSize'] = 9500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test09(self): """ _test09_ Test under merge (over merge event threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxInputEvents'] = 300 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return def test10(self): """ _test10_ Test merging of multiple lumis with holes in the lumi sequence Hole is due to no streamer files for the lumi Multi lumi input """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 5]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4, 'STREAM' : "A", 'FILECOUNT' : 1, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) self.feedStreamersDAO.execute(transaction = False) self.fileset1.loadData() jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") for fileid in self.fileset1.getFiles(type = 'id'): self.acquireFilesDAO.execute(self.subscription1['id'], fileid, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") for fileid in self.fileset1.getFiles(type = 'id'): self.completeFilesDAO.execute(self.subscription1['id'], fileid, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
workload = factory(workloadName, arguments) workloadPath = os.path.join(workloadName, workloadFile) workload.setOwner("*****@*****.**") workload.setSpecUrl(workloadPath) # Build a sandbox using TaskMaker taskMaker = TaskMaker(workload, os.path.join(os.getcwd(), workloadName)) taskMaker.skipSubscription = True taskMaker.processWorkload() workload.save(workloadPath) myThread = threading.currentThread() myThread.transaction.begin() for workloadTask in workload.taskIterator(): inputFileset = Fileset(name = workloadTask.getPathName()) inputFileset.create() virtualFile = File(lfn = "%s-virtual-input" % workloadTask.getPathName(), size = 0, events = numEvents, locations = set(["cmssrm.fnal.gov", "storm-fe-cms.cr.cnaf.infn.it", "cmssrm-fzk.gridka.de", "srm2.grid.sinica.edu.tw", "srm-cms.gridpp.rl.ac.uk", "ccsrm.in2p3.fr", "srmcms.pic.es"]), merged = False) myRun = Run(runNumber = 1) myRun.appendLumi(1) virtualFile.addRun(myRun) virtualFile.create() inputFileset.addFile(virtualFile) inputFileset.commit()
def createJobs(self): """ _createJobs_ Create test jobs in WMBS and BossAir """ testWorkflow = Workflow(spec = makeUUID(), owner = "tapas", name = makeUUID(), task = "Test") testWorkflow.create() testFilesetA = Fileset(name = "TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name = "TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name = "TestFilesetC") testFilesetC.create() testFileA = File(lfn = "testFileA", locations = set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset = testFilesetA, workflow = testWorkflow, type = "Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{"site_name": "testSite1", "valid": True}]) testSubscriptionB = Subscription(fileset = testFilesetB, workflow = testWorkflow, type = "Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{"site_name": "testSite1", "valid": False}]) testSubscriptionC = Subscription(fileset = testFilesetC, workflow = testWorkflow, type = "Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription = testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription = testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription = testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name = "testJobA", files = [testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name = "testJobB", files = [testFileA]) testJobB["couch_record"] = makeUUID() testJobB["cache_dir"] = self.tempDir testJobB.create(group = testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name = "testJobC", files = [testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name = "testJobD", files = [testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group = testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name = "testJobE", files = [testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group = testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name = "testJobF", files = [testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group = testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name = "testJobG", files = [testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group = testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name = "testJobH", files = [testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group = testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name = "testJobI", files = [testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group = testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name = "testJobJ", files = [testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group = testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname = "Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite2") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") return
myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"]) for lumi in dbsResult["LumiList"]: myRun.lumis.append(lumi["LumiSectionNumber"]) myFile.addRun(myRun) myFile.create() inputFileset.addFile(myFile) if len(inputFileset) < 1: raise Exception("No files were selected!") inputFileset.commit() inputFileset.markOpen(False) return myThread = threading.currentThread() myThread.transaction.begin() for workloadTask in workload.taskIterator(): inputFileset = Fileset(name=workloadTask.getPathName()) inputFileset.create() inputDataset = workloadTask.inputDataset() inputDatasetPath = "/%s/%s/%s" % ( inputDataset.primary, inputDataset.processed, inputDataset.tier) injectFilesFromDBS(inputFileset, inputDatasetPath, options.RunWhitelist) myWMBSHelper = WMBSHelper(workload) myWMBSHelper._createSubscriptionsInWMBS(workloadTash.getPathName()) myThread.transaction.commit()
class SplitFileBasedTest(unittest.TestCase): """ _SplitFileBasedTest_ Unit tests for the split file job splitting algorithm. """ def setUp(self): """ _setUp_ Create database connection and load up the WMBS schema. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) return def tearDown(self): """ _tearDown_ Clear out WMBS. """ self.testInit.clearDatabase() return def stuffWMBS(self): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") locationAction.execute("site1", pnn="T2_CH_CERN") self.mergeFileset = Fileset(name="mergeFileset") self.mergeFileset.create() self.bogusFileset = Fileset(name="bogusFileset") self.bogusFileset.create() self.mergeMergedFileset = Fileset(name="mergeMergedFileset") self.mergeMergedFileset.create() self.bogusMergedFileset = Fileset(name="bogusMergedFileset") self.bogusMergedFileset.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2", owner="Steve", task="Test") mergeWorkflow.create() self.mergeSubscription = Subscription(fileset=self.mergeFileset, workflow=mergeWorkflow, split_algo="SplitFileBased") self.mergeSubscription.create() self.bogusSubscription = Subscription(fileset=self.bogusFileset, workflow=mergeWorkflow, split_algo="SplitFileBased") inputFileset = Fileset(name="inputFileset") inputFileset.create() inputWorkflow = Workflow(name="inputWorkflow", spec="input", owner="Steve", task="Test") inputWorkflow.create() inputWorkflow.addOutput("someOutput", self.mergeFileset, self.mergeMergedFileset) inputWorkflow.addOutput("someOutput2", self.bogusFileset, self.bogusMergedFileset) inputSubscription = Subscription(fileset=inputFileset, workflow=inputWorkflow) inputSubscription.create() parentFile1 = File(lfn="parentFile1") parentFile1.create() parentFile2 = File(lfn="parentFile2") parentFile2.create() parentFile3 = File(lfn="parentFile3") parentFile3.create() parentFile4 = File(lfn="parentFile4") parentFile4.create() jobGroup1 = JobGroup(subscription=inputSubscription) jobGroup1.create() jobGroup2 = JobGroup(subscription=inputSubscription) jobGroup2.create() testJob1 = Job() testJob1.addFile(parentFile1) testJob1.create(jobGroup1) testJob1["state"] = "cleanout" testJob1["oldstate"] = "new" testJob1["couch_record"] = "somejive" testJob1["retry_count"] = 0 testJob1["outcome"] = "success" testJob1.save() changeStateDAO.execute([testJob1]) testJob2 = Job() testJob2.addFile(parentFile2) testJob2.create(jobGroup1) testJob2["state"] = "cleanout" testJob2["oldstate"] = "new" testJob2["couch_record"] = "somejive" testJob2["retry_count"] = 0 testJob2["outcome"] = "success" testJob2.save() changeStateDAO.execute([testJob2]) testJob3 = Job() testJob3.addFile(parentFile3) testJob3.create(jobGroup2) testJob3["state"] = "cleanout" testJob3["oldstate"] = "new" testJob3["couch_record"] = "somejive" testJob3["retry_count"] = 0 testJob3["outcome"] = "success" testJob3.save() changeStateDAO.execute([testJob3]) testJob4 = Job() testJob4.addFile(parentFile4) testJob4.create(jobGroup2) testJob4["state"] = "cleanout" testJob4["oldstate"] = "new" testJob4["couch_record"] = "somejive" testJob4["retry_count"] = 0 testJob4["outcome"] = "failure" testJob4.save() changeStateDAO.execute([testJob4]) # We'll simulate a failed split by event job that the merger should # ignore. parentFile5 = File(lfn="parentFile5") parentFile5.create() testJob5 = Job() testJob5.addFile(parentFile5) testJob5.create(jobGroup2) testJob5["state"] = "cleanout" testJob5["oldstate"] = "new" testJob5["couch_record"] = "somejive" testJob5["retry_count"] = 0 testJob5["outcome"] = "success" testJob5.save() changeStateDAO.execute([testJob5]) testJob6 = Job() testJob6.addFile(parentFile5) testJob6.create(jobGroup2) testJob6["state"] = "cleanout" testJob6["oldstate"] = "new" testJob6["couch_record"] = "somejive" testJob6["retry_count"] = 0 testJob6["outcome"] = "failure" testJob6.save() changeStateDAO.execute([testJob6]) badFile1 = File(lfn="badFile1", size=10241024, events=10241024, first_event=0, locations=set(["T2_CH_CERN"])) badFile1.addRun(Run(1, *[45])) badFile1.create() badFile1.addParent(parentFile5["lfn"]) file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations=set(["T2_CH_CERN"])) file1.addRun(Run(1, *[45])) file1.create() file1.addParent(parentFile1["lfn"]) file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations=set(["T2_CH_CERN"])) file2.addRun(Run(1, *[45])) file2.create() file2.addParent(parentFile1["lfn"]) file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations=set(["T2_CH_CERN"])) file3.addRun(Run(1, *[45])) file3.create() file3.addParent(parentFile1["lfn"]) file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations=set(["T2_CH_CERN"])) file4.addRun(Run(1, *[45])) file4.create() file4.addParent(parentFile1["lfn"]) fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations=set(["T2_CH_CERN"])) fileA.addRun(Run(1, *[46])) fileA.create() fileA.addParent(parentFile2["lfn"]) fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations=set(["T2_CH_CERN"])) fileB.addRun(Run(1, *[46])) fileB.create() fileB.addParent(parentFile2["lfn"]) fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations=set(["T2_CH_CERN"])) fileC.addRun(Run(1, *[46])) fileC.create() fileC.addParent(parentFile2["lfn"]) fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations=set(["T2_CH_CERN"])) fileI.addRun(Run(2, *[46])) fileI.create() fileI.addParent(parentFile3["lfn"]) fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations=set(["T2_CH_CERN"])) fileII.addRun(Run(2, *[46])) fileII.create() fileII.addParent(parentFile3["lfn"]) fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations=set(["T2_CH_CERN"])) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIII.addParent(parentFile3["lfn"]) fileIV = File(lfn="fileIV", size=1024, events=1024, first_event=3072, locations=set(["T2_CH_CERN"])) fileIV.addRun(Run(2, *[46])) fileIV.create() fileIV.addParent(parentFile3["lfn"]) fileX = File(lfn="badFileA", size=1024, events=1024, first_event=0, locations=set(["T2_CH_CERN"])) fileX.addRun(Run(1, *[47])) fileX.create() fileX.addParent(parentFile4["lfn"]) fileY = File(lfn="badFileB", size=1024, events=1024, first_event=1024, locations=set(["T2_CH_CERN"])) fileY.addRun(Run(1, *[47])) fileY.create() fileY.addParent(parentFile4["lfn"]) fileZ = File(lfn="badFileC", size=1024, events=1024, first_event=2048, locations=set(["T2_CH_CERN"])) fileZ.addRun(Run(1, *[47])) fileZ.create() fileZ.addParent(parentFile4["lfn"]) jobGroup1.output.addFile(file1) jobGroup1.output.addFile(file2) jobGroup1.output.addFile(file3) jobGroup1.output.addFile(file4) jobGroup1.output.addFile(fileA) jobGroup1.output.addFile(fileB) jobGroup1.output.addFile(fileC) jobGroup1.output.commit() jobGroup2.output.addFile(fileI) jobGroup2.output.addFile(fileII) jobGroup2.output.addFile(fileIII) jobGroup2.output.addFile(fileIV) jobGroup2.output.addFile(fileX) jobGroup2.output.addFile(fileY) jobGroup2.output.addFile(fileZ) jobGroup2.output.addFile(badFile1) jobGroup2.output.commit() for file in [ file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1 ]: self.mergeFileset.addFile(file) self.bogusFileset.addFile(file) self.mergeFileset.commit() self.bogusFileset.commit() return def testSplitAlgo(self): """ _testSplitAlgo_ Run the SplitFileBased splitting algorithm over the data created in the merge subscription. This should produce three job groups each containing one job. The files in the job should be ordered correctly. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory() assert len(result) == 3, \ "ERROR: Wrong number of job groups returned: %s" % len(result) for jobGroup in result: assert len(jobGroup.jobs) == 1, \ "ERROR: One job should be in a job group." goldenFilesA = ["file1", "file2", "file3", "file4"] goldenFilesB = ["fileA", "fileB", "fileC"] goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"] for jobGroup in result: jobFiles = jobGroup.jobs.pop().getFiles() if jobFiles[0]["lfn"] in goldenFilesA: goldenFiles = goldenFilesA elif jobFiles[0]["lfn"] in goldenFilesB: goldenFiles = goldenFilesB else: goldenFiles = goldenFilesC currentRun = 0 currentLumi = 0 currentEvent = 0 for file in jobFiles: file.loadData() assert file["lfn"] in goldenFiles, \ "Error: Unknown file in merge jobs." assert len(file["locations"]) == 1, \ "Error: Wrong number of file locations." assert "T2_CH_CERN" in file["locations"], \ "Error: File is missing a location." goldenFiles.remove(file["lfn"]) fileRun = list(file["runs"])[0].run fileLumi = min(list(file["runs"])[0]) fileEvent = file["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run." if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent > currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \ len(goldenFilesC) == 0, \ "ERROR: Files missing from merge jobs." return
def createTestJobGroup(self, nJobs=10, retry_count=1, workloadPath='test', fwjrPath=None, workloadName=makeUUID(), fileModifier=''): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec=workloadPath, owner="cmsdataops", group="cmsdataops", name=workloadName, task="/TestWorkload/ReReco") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier, size=1024, events=10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('T2_CH_CERN') testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileA.addRun(Run(10, *[12312, 12313])) testFileA.setLocation('T2_CH_CERN') testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileB.addRun(Run(10, *[12314, 12315, 12316])) testFileB.setLocation('T2_CH_CERN') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier) testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier) for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312]) testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316]) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) testJob['fwjr_path'] = fwjrPath os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJob.create(group=testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files=[testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def testGetFinishedWorkflows(self): """ _testGetFinishedWorkflows_ Test that we get only those workflows which are finished, that is, workflows where all its subscriptions are finished and all other workflows with the same spec are finished too """ owner = "no-one" #Create a bunch of worklows with "different" specs and tasks workflows = [] for i in range(0, 100): scaledIndex = i % 10 testWorkflow = Workflow(spec="sp00%i" % scaledIndex, owner=owner, name="wf00%i" % scaledIndex, task="task%i" % i) testWorkflow.create() workflows.append(testWorkflow) #Everyone will use this fileset testFileset = Fileset(name="TestFileset") testFileset.create() #Create subscriptions! subscriptions = [] for workflow in workflows: subscription = Subscription(fileset=testFileset, workflow=workflow) subscription.create() subscriptions.append(subscription) #Check that all workflows are NOT finished myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) getFinishedDAO = daoFactory(classname="Workflow.GetFinishedWorkflows") result = getFinishedDAO.execute() self.assertEqual( len(result), 0, "A workflow is incorrectly flagged as finished: %s" % str(result)) #Mark the first 50 subscriptions as finished for idx, sub in enumerate(subscriptions): if idx > 49: break sub.markFinished() #No workflow is finished, none of them has all the subscriptions completed result = getFinishedDAO.execute() self.assertEqual( len(result), 0, "A workflow is incorrectly flagged as finished: %s" % str(result)) #Now finish all workflows in wf{000-5} for idx, sub in enumerate(subscriptions): if idx < 50 or idx % 10 > 5: continue sub.markFinished() #Check the workflows result = getFinishedDAO.execute() self.assertEqual( len(result), 6, "A workflow is incorrectly flagged as finished: %s" % str(result)) #Check the overall structure of the workflows for wf in result: #Sanity checks on the results # These are very specific checks and depends heavily on the names of task, spec and workflow self.assertEqual( wf[2:], result[wf]['spec'][2:], "A workflow has the wrong spec-name combination: %s" % str(wf)) self.assertTrue( int(wf[2:]) < 6, "A workflow is incorrectly flagged as finished: %s" % str(wf)) self.assertEqual( len(result[wf]['workflows']), 10, "A workflow has more tasks than it should: %s" % str(result[wf])) for task in result[wf]['workflows']: self.assertEqual( len(result[wf]['workflows'][task]), 1, "A workflow has more subscriptions than it should: %s" % str(result[wf])) return
class ParentlessMergeBySizeTest(unittest.TestCase): """ _ParentlessMergeBySizeTest_ Unit tests for parentless WMBS merging. """ def setUp(self): """ _setUp_ Boiler plate DB setup. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) return def tearDown(self): """ _tearDown_ Clear out WMBS. """ self.testInit.clearDatabase() return def stuffWMBS(self): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T1_US_FNAL", pnn="T1_US_FNAL_Disk") self.mergeFileset = Fileset(name="mergeFileset") self.mergeFileset.create() self.bogusFileset = Fileset(name="bogusFileset") self.bogusFileset.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2", owner="Steve", task="Test") mergeWorkflow.create() markWorkflow = self.daoFactory( classname="Workflow.MarkInjectedWorkflows") markWorkflow.execute(names=[mergeWorkflow.name], injected=True) self.mergeSubscription = Subscription( fileset=self.mergeFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") self.mergeSubscription.create() self.bogusSubscription = Subscription( fileset=self.bogusFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations=set(["T1_US_FNAL_Disk"])) file1.addRun(Run(1, *[45])) file1.create() file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations=set(["T1_US_FNAL_Disk"])) file2.addRun(Run(1, *[45])) file2.create() file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations=set(["T1_US_FNAL_Disk"])) file3.addRun(Run(1, *[45])) file3.create() file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations=set(["T1_US_FNAL_Disk"])) file4.addRun(Run(1, *[45])) file4.create() fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations=set(["T1_US_FNAL_Disk"])) fileA.addRun(Run(1, *[46])) fileA.create() fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations=set(["T1_US_FNAL_Disk"])) fileB.addRun(Run(1, *[46])) fileB.create() fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations=set(["T1_US_FNAL_Disk"])) fileC.addRun(Run(1, *[46])) fileC.create() fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations=set(["T1_US_FNAL_Disk"])) fileI.addRun(Run(2, *[46])) fileI.create() fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations=set(["T1_US_FNAL_Disk"])) fileII.addRun(Run(2, *[46])) fileII.create() fileIII = File(lfn="fileIII", size=1024, events=102400, first_event=2048, locations=set(["T1_US_FNAL_Disk"])) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIV = File(lfn="fileIV", size=102400, events=1024, first_event=3072, locations=set(["T1_US_FNAL_Disk"])) fileIV.addRun(Run(2, *[46])) fileIV.create() for jobFile in [ file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV ]: self.mergeFileset.addFile(jobFile) self.bogusFileset.addFile(jobFile) self.mergeFileset.commit() self.bogusFileset.commit() return def testMinMergeSize1(self): """ _testMinMergeSize1_ Set the minimum merge size to be 20,000 bytes which is more than the sum of all file sizes in the WMBS instance. Verify that no merge jobs will be produced. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=200000, max_merge_size=2000000000, max_merge_events=200000000) assert len(result) == 0, \ "ERROR: No job groups should be returned." return def testMinMergeSize1a(self): """ _testMinMergeSize1a_ Set the minimum merge size to be 20,000 bytes which is more than the sum of all file sizes in the WMBS instance and mark the fileset as closed. Verify that one job containing all files is pushed out. """ self.stuffWMBS() self.mergeFileset.markOpen(False) splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=200000, max_merge_size=2000000, max_merge_events=2000000) assert len(result) == 1, \ "ERROR: More than one JobGroup returned: %s" % len(result) assert len(result[0].jobs) == 1, \ "Error: One job should have been returned: %s" % len(result[0].jobs) self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 10 + 2 * 100) self.assertEqual(result[0].jobs[0]["possiblePSN"], set(["T1_US_FNAL"])) goldenFiles = [ "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC", "fileI", "fileII", "fileIII", "fileIV" ] jobFiles = result[0].jobs[0].getFiles() currentRun = 0 currentLumi = 0 currentEvent = 0 for jobFile in jobFiles: jobFile.loadData() self.assertTrue(jobFile["lfn"] in goldenFiles, "Error: Unknown file: %s" % jobFile["lfn"]) goldenFiles.remove(jobFile["lfn"]) fileRun = list(jobFile["runs"])[0].run fileLumi = min(list(jobFile["runs"])[0]) fileEvent = jobFile["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run." if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent return def testMaxMergeSize(self): """ _testMaxMergeSize_ Set the maximum merge size to be 100000 bytes. Verify that two merge jobs are created, one for the one large file and another for the rest of the files. Verify that each merge job contains the expected files and that we merge across runs. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=1, max_merge_size=100000, max_merge_events=200000) assert len(result) == 1, \ "ERROR: More than one JobGroup returned: %s" % result assert len(result[0].jobs) == 2, \ "ERROR: Two jobs should have been returned." goldenFilesA = [ "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC", "fileI", "fileII", "fileIII" ] goldenFilesB = ["fileIV"] for job in result[0].jobs: self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"])) jobFiles = job.getFiles() if jobFiles[0]["lfn"] in goldenFilesA: self.assertEqual(job["estimatedDiskUsage"], 11) goldenFiles = goldenFilesA elif jobFiles[0]["lfn"] in goldenFilesB: self.assertEqual(job["estimatedDiskUsage"], 2 * 100) goldenFiles = goldenFilesB currentRun = 0 currentLumi = 0 currentEvent = 0 for jobFile in jobFiles: self.assertTrue(jobFile["lfn"] in goldenFiles, "Error: Unknown file: %s" % jobFile["lfn"]) goldenFiles.remove(jobFile["lfn"]) fileRun = list(jobFile["runs"])[0].run fileLumi = min(list(jobFile["runs"])[0]) fileEvent = jobFile["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run." if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0, \ "ERROR: Files missing from merge jobs." return def testMaxEvents(self): """ _testMaxEvents_ Verify the the max_merge_events parameter works and that we correctly merge across runs. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=1, max_merge_size=20000000, max_merge_events=100000) assert len(result) == 1, \ "ERROR: More than one JobGroup returned: %s" % result assert len(result[0].jobs) == 2, \ "ERROR: Two jobs should have been returned: %s" % len(result[0].jobs) goldenFilesA = [ "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC", "fileI", "fileII", "fileIV" ] goldenFilesB = ["fileIII"] for job in result[0].jobs: self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"])) jobFiles = job.getFiles() if jobFiles[0]["lfn"] in goldenFilesA: self.assertEqual(job["estimatedDiskUsage"], 9 + 2 * 100) goldenFiles = goldenFilesA elif jobFiles[0]["lfn"] in goldenFilesB: self.assertEqual(job["estimatedDiskUsage"], 2) goldenFiles = goldenFilesB currentRun = 0 currentLumi = 0 currentEvent = 0 for jobFile in jobFiles: self.assertTrue(jobFile["lfn"] in goldenFiles, "Error: Unknown file: %s" % jobFile["lfn"]) goldenFiles.remove(jobFile["lfn"]) fileRun = list(jobFile["runs"])[0].run fileLumi = min(list(jobFile["runs"])[0]) fileEvent = jobFile["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun) if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \ "ERROR: Files missing from merge jobs." return def testMinMergeSize1aNoRunMerge(self): """ _testMinMergeSize1aNoRunMerge_ Set the minimum merge size to be 20,000 bytes which is more than the sum of all file sizes in the WMBS instance and mark the fileset as closed. Verify that two jobs are pushed out and that we don't merge accross run boundaries. """ self.stuffWMBS() self.mergeFileset.markOpen(False) splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=200000, max_merge_size=2000000, max_merge_events=2000000, merge_across_runs=False) assert len(result) == 1, \ "ERROR: More than one JobGroup returned: %s" % len(result) assert len(result[0].jobs) == 2, \ "Error: Two jobs should have been returned: %s" % len(result[0].jobs) goldenFilesA = [ "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC" ] goldenFilesB = ["fileI", "fileII", "fileIII", "fileIV"] goldenFilesA.sort() goldenFilesB.sort() for job in result[0].jobs: self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"])) currentRun = 0 currentLumi = 0 currentEvent = 0 jobLFNs = [] for jobFile in job.getFiles(): jobFile.loadData() jobLFNs.append(jobFile["lfn"]) fileRun = list(jobFile["runs"])[0].run fileLumi = min(list(jobFile["runs"])[0]) fileEvent = jobFile["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue assert fileRun >= currentRun, \ "ERROR: Files not sorted by run." if fileRun == currentRun: assert fileLumi >= currentLumi, \ "ERROR: Files not ordered by lumi" if fileLumi == currentLumi: assert fileEvent >= currentEvent, \ "ERROR: Files not ordered by first event" currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent jobLFNs.sort() if jobLFNs == goldenFilesA: self.assertEqual(job["estimatedDiskUsage"], 8) goldenFilesA = [] else: self.assertEqual(job["estimatedDiskUsage"], 3 + 2 * 100) self.assertEqual(jobLFNs, goldenFilesB, "Error: LFNs do not match.") goldenFilesB = [] return def testMaxMergeSizeNoRunMerge(self): """ _testMaxMergeSizeNoRunMerge_ Set the maximum merge size to be 100000 bytes. Verify that two merge jobs are created, one for the one large file and another for the rest of the files. Verify that each merge job contains the expected files and that we don't merge across run boundaries. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=1, max_merge_size=100000, max_merge_events=200000, merge_across_runs=False) assert len(result) == 1, \ "ERROR: More than one JobGroup returned: %s" % result assert len(result[0].jobs) == 3, \ "ERROR: Three jobs should have been returned." goldenFilesA = [ "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC" ] goldenFilesB = ["fileI", "fileII", "fileIII"] goldenFilesC = ["fileIV"] for job in result[0].jobs: self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"])) jobFiles = job.getFiles() if jobFiles[0]["lfn"] in goldenFilesA: self.assertEqual(job["estimatedDiskUsage"], 8) goldenFiles = goldenFilesA elif jobFiles[0]["lfn"] in goldenFilesB: self.assertEqual(job["estimatedDiskUsage"], 4) goldenFiles = goldenFilesB else: self.assertEqual(job["estimatedDiskUsage"], 2 * 100) goldenFiles = goldenFilesC currentRun = 0 currentLumi = 0 currentEvent = 0 for jobFile in jobFiles: self.assertTrue(jobFile["lfn"] in goldenFiles, "Error: Unknown file: %s" % jobFile["lfn"]) goldenFiles.remove(jobFile["lfn"]) fileRun = list(jobFile["runs"])[0].run fileLumi = min(list(jobFile["runs"])[0]) fileEvent = jobFile["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue self.assertTrue(fileRun >= currentRun, "ERROR: Files not sorted by run.") if fileRun == currentRun: self.assertTrue(fileLumi >= currentLumi, "ERROR: Files not ordered by lumi") if fileLumi == currentLumi: self.assertTrue(fileEvent >= currentEvent, "ERROR: Files not ordered by first event") currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent self.assertTrue( len(goldenFilesA) == 0 and len(goldenFilesB) == 0, "ERROR: Files missing from merge jobs.") return def testMaxEventsNoRunMerge(self): """ _testMaxEventsNoRunMerge_ Verify that the max events merge parameter works correctly and that we don't merge accross run boundaries. """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=1, max_merge_size=20000000, max_merge_events=100000, merge_across_runs=False) self.assertTrue( len(result) == 1, "ERROR: More than one JobGroup returned: %s" % result) self.assertTrue( len(result[0].jobs) == 3, "ERROR: Three jobs should have been returned: %s" % len(result[0].jobs)) goldenFilesA = [ "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC", ] goldenFilesB = ["fileI", "fileII", "fileIV"] goldenFilesC = ["fileIII"] for job in result[0].jobs: self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"])) jobFiles = job.getFiles() if jobFiles[0]["lfn"] in goldenFilesA: self.assertEqual(job["estimatedDiskUsage"], 8) goldenFiles = goldenFilesA elif jobFiles[0]["lfn"] in goldenFilesB: self.assertEqual(job["estimatedDiskUsage"], 2 + 2 * 100) goldenFiles = goldenFilesB else: self.assertEqual(job["estimatedDiskUsage"], 2 * 1) goldenFiles = goldenFilesC currentRun = 0 currentLumi = 0 currentEvent = 0 for jobFile in jobFiles: self.assertTrue(jobFile["lfn"] in goldenFiles, "Error: Unknown file: %s" % jobFile["lfn"]) goldenFiles.remove(jobFile["lfn"]) fileRun = list(jobFile["runs"])[0].run fileLumi = min(list(jobFile["runs"])[0]) fileEvent = jobFile["first_event"] if currentRun == 0: currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent continue self.assertTrue( fileRun >= currentRun, "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun)) if fileRun == currentRun: self.assertTrue(fileLumi >= currentLumi, "ERROR: Files not ordered by lumi") if fileLumi == currentLumi: self.assertTrue( fileEvent >= currentEvent, "ERROR: Files not ordered by first event") currentRun = fileRun currentLumi = fileLumi currentEvent = fileEvent self.assertTrue( len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and len(goldenFilesC) == 0, "ERROR: Files missing from merge jobs.") return def testLocationMerging(self): """ _testLocationMerging_ Verify that files residing on different SEs are not merged together in the same job. """ self.stuffWMBS() locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk") fileSite2 = File(lfn="fileRAL", size=4098, events=1024, first_event=0, locations=set(["T1_UK_RAL_Disk"])) fileSite2.addRun(Run(1, *[46])) fileSite2.create() self.mergeFileset.addFile(fileSite2) self.mergeFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=99999999, max_merge_events=999999999, merge_across_runs=False) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 3, \ "ERROR: Three jobs should have been returned." ralJobs = 0 fnalJobs = 0 for job in result[0].jobs: if job["possiblePSN"] == set(["T1_UK_RAL"]): ralJobs += 1 elif job["possiblePSN"] == set(["T1_US_FNAL"]): fnalJobs += 1 self.assertEqual(ralJobs, 1) self.assertEqual(fnalJobs, 2) return def testMaxWaitTime(self): """ _testMaxWaitTime_ Set the max wait times to be negative - this should force all files to merge out immediately Using the first setup as the first merge test which should normally produce no jobGroups """ self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=200000, max_merge_size=2000000000, max_merge_events=200000000, max_wait_time=-10) # Everything should be in one, small jobGroup self.assertEqual(len(result), 1) self.assertEqual(len(result[0].jobs), 1) job = result[0].jobs[0] # All files should be in one job self.assertEqual(len(job.getFiles()), 11) return def testDifferentSubscritionIDs(self): """ _testDifferentSubscriptionIDs_ Make sure that the merge splitting still runs if the subscription ID is not equal to the workflow ID. """ myThread = threading.currentThread() myThread.transaction.begin() dummyWorkflow = Workflow(name="dummyWorkflow", spec="bunk49", owner="Steve", task="Test2") dummyWorkflow.create() dummyFileset = Fileset(name="dummyFileset") dummyFileset.create() dummySubscription1 = Subscription(fileset=dummyFileset, workflow=dummyWorkflow, split_algo="ParentlessMergeBySize") dummySubscription2 = Subscription(fileset=dummyFileset, workflow=dummyWorkflow, split_algo="ParentlessMergeBySize") dummySubscription1.create() dummySubscription2.create() myThread.transaction.commit() self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=99999999, max_merge_events=999999999, merge_across_runs=False) self.assertEqual(len(result), 1) jobGroup = result[0] self.assertEqual(len(jobGroup.jobs), 2) return
def injectJobs(self): """ _injectJobs_ Inject two workflows into WMBS and save the job objects to disk. """ testWorkflowA = Workflow(spec="specA.pkl", owner="Steve", name="wf001", task="TestTaskA") testWorkflowA.create() testWorkflowB = Workflow(spec="specB.pkl", owner="Steve", name="wf002", task="TestTaskB") testWorkflowB.create() testFileset = Fileset("testFileset") testFileset.create() testSubA = Subscription(fileset=testFileset, workflow=testWorkflowA) testSubA.create() testSubB = Subscription(fileset=testFileset, workflow=testWorkflowB) testSubB.create() testGroupA = JobGroup(subscription=testSubA) testGroupA.create() testGroupB = JobGroup(subscription=testSubB) testGroupB.create() stateChanger = ChangeState(self.createConfig(), "jobsubmittercaching_t") for i in range(10): newFile = File(lfn="testFile%s" % i, locations=set(["se.T1_US_FNAL", "se.T1_UK_RAL"])) newFile.create() newJobA = Job(name="testJobA-%s" % i, files=[newFile]) newJobA["workflow"] = "wf001" newJobA["possiblePSN"] = ["T1_US_FNAL"] newJobA["sandbox"] = "%s/somesandbox" % self.testDir newJobA["owner"] = "Steve" jobCacheDir = os.path.join(self.testDir, "jobA-%s" % i) os.mkdir(jobCacheDir) newJobA["cache_dir"] = jobCacheDir newJobA["type"] = "Processing" newJobA.create(testGroupA) jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "w") pickle.dump(newJobA, jobHandle) jobHandle.close() stateChanger.propagate([newJobA], "created", "new") newJobB = Job(name="testJobB-%s" % i, files=[newFile]) newJobB["workflow"] = "wf001" newJobB["possiblePSN"] = ["T1_UK_RAL"] newJobB["sandbox"] = "%s/somesandbox" % self.testDir newJobB["owner"] = "Steve" jobCacheDir = os.path.join(self.testDir, "jobB-%s" % i) os.mkdir(jobCacheDir) newJobB["cache_dir"] = jobCacheDir newJobB["type"] = "Processing" newJobB.create(testGroupB) jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "w") pickle.dump(newJobB, jobHandle) jobHandle.close() stateChanger.propagate([newJobB], "created", "new") return
def testC_ACDCTest(self): """ _ACDCTest_ Test whether we can get a goodRunList out of ACDC and process it correctly. """ workload = self.createTestWorkload() dcs = DataCollectionService(url=self.testInit.couchUrl, database=self.testInit.couchDbName) testFileA = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileA.addRun(Run(1, 1, 2)) testFileA.create() testFileB = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileB.addRun(Run(1, 3)) testFileB.create() testJobA = getJob(workload) testJobA.addFile(testFileA) testJobA.addFile(testFileB) testFileC = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileC.addRun(Run(1, 4, 6)) testFileC.create() testJobB = getJob(workload) testJobB.addFile(testFileC) testFileD = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileD.addRun(Run(1, 7)) testFileD.create() testJobC = getJob(workload) testJobC.addFile(testFileD) testFileE = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileE.addRun(Run(1, 11, 12)) testFileE.create() testJobD = getJob(workload) testJobD.addFile(testFileE) testFileF = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileF.addRun(Run(2, 5, 6, 7)) testFileF.create() testJobE = getJob(workload) testJobE.addFile(testFileF) testFileG = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileG.addRun(Run(2, 10, 11, 12)) testFileG.create() testJobF = getJob(workload) testJobF.addFile(testFileG) testFileH = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileH.addRun(Run(2, 15)) testFileH.create() testJobG = getJob(workload) testJobG.addFile(testFileH) testFileI = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileI.addRun(Run(3, 20)) testFileI.create() testJobH = getJob(workload) testJobH.addFile(testFileI) testFileJ = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileJ.addRun(Run(1, 9)) testFileJ.create() testJobI = getJob(workload) testJobI.addFile(testFileJ) # dcs.failedJobs([testJobA, testJobB, testJobC, testJobD, testJobE, # testJobF, testJobG, testJobH, testJobI]) dcs.failedJobs([testJobA, testJobD, testJobH]) baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.addFile(testFileD) testFileset.addFile(testFileE) testFileset.addFile(testFileF) testFileset.addFile(testFileG) testFileset.addFile(testFileH) testFileset.addFile(testFileI) testFileset.addFile(testFileJ) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="LumiBased", type="Processing") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroups = jobFactory(lumis_per_job=100, halt_job_on_file_boundaries=False, splitOnRun=True, collectionName=workload.name(), filesetName=workload.getTask("reco").getPathName(), owner="evansde77", group="DMWM", couchURL=self.testInit.couchUrl, couchDB=self.testInit.couchDbName, performance=self.performanceParams) self.assertEqual(jobGroups[0].jobs[0]['mask'].getRunAndLumis(), {1: [[1, 2], [3, 3], [11, 12]]}) self.assertEqual(jobGroups[0].jobs[1]['mask'].getRunAndLumis(), {3: [[20, 20]]}) return
def testParallelProcessing(self): """ _testParallelProcessing_ Verify that merging works correctly when multiple processing subscriptions are run over the same input files. The merging algorithm should ignore processing jobs that feed into different merge subscriptions. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN") locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN") mergeFilesetA = Fileset(name="mergeFilesetA") mergeFilesetB = Fileset(name="mergeFilesetB") mergeFilesetA.create() mergeFilesetB.create() mergeMergedFilesetA = Fileset(name="mergeMergedFilesetA") mergeMergedFilesetB = Fileset(name="mergeMergedFilesetB") mergeMergedFilesetA.create() mergeMergedFilesetB.create() mergeWorkflow = Workflow(name="mergeWorkflow", spec="bogus", owner="Steve", task="Test") mergeWorkflow.create() mergeSubscriptionA = Subscription(fileset=mergeFilesetA, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") mergeSubscriptionB = Subscription(fileset=mergeFilesetB, workflow=mergeWorkflow, split_algo="WMBSMergeBySize") mergeSubscriptionA.create() mergeSubscriptionB.create() inputFileset = Fileset(name="inputFileset") inputFileset.create() inputFileA = File(lfn="inputLFNA") inputFileB = File(lfn="inputLFNB") inputFileA.create() inputFileB.create() procWorkflowA = Workflow(name="procWorkflowA", spec="bunk2", owner="Steve", task="Test") procWorkflowA.create() procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA) procWorkflowB = Workflow(name="procWorkflowB", spec="bunk3", owner="Steve", task="Test2") procWorkflowB.create() procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB) procSubscriptionA = Subscription(fileset=inputFileset, workflow=procWorkflowA, split_algo="EventBased") procSubscriptionA.create() procSubscriptionB = Subscription(fileset=inputFileset, workflow=procWorkflowB, split_algo="EventBased") procSubscriptionB.create() jobGroupA = JobGroup(subscription=procSubscriptionA) jobGroupA.create() jobGroupB = JobGroup(subscription=procSubscriptionB) jobGroupB.create() changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") testJobA = Job() testJobA.addFile(inputFileA) testJobA.create(jobGroupA) testJobA["state"] = "cleanout" testJobA["oldstate"] = "new" testJobA["couch_record"] = "somejive" testJobA["retry_count"] = 0 testJobA["outcome"] = "success" testJobA.save() testJobB = Job() testJobB.addFile(inputFileB) testJobB.create(jobGroupA) testJobB["state"] = "cleanout" testJobB["oldstate"] = "new" testJobB["couch_record"] = "somejive" testJobB["retry_count"] = 0 testJobB["outcome"] = "success" testJobB.save() testJobC = Job() testJobC.addFile(inputFileA) testJobC.create(jobGroupB) testJobC["state"] = "cleanout" testJobC["oldstate"] = "new" testJobC["couch_record"] = "somejive" testJobC["retry_count"] = 0 testJobC["outcome"] = "success" testJobC.save() testJobD = Job() testJobD.addFile(inputFileA) testJobD.create(jobGroupB) testJobD["state"] = "cleanout" testJobD["oldstate"] = "new" testJobD["couch_record"] = "somejive" testJobD["retry_count"] = 0 testJobD["outcome"] = "failure" testJobD.save() testJobE = Job() testJobE.addFile(inputFileB) testJobE.create(jobGroupB) testJobE["state"] = "cleanout" testJobE["oldstate"] = "new" testJobE["couch_record"] = "somejive" testJobE["retry_count"] = 0 testJobE["outcome"] = "success" testJobE.save() testJobF = Job() testJobF.addFile(inputFileB) testJobF.create(jobGroupB) testJobF["state"] = "cleanout" testJobF["oldstate"] = "new" testJobF["couch_record"] = "somejive" testJobF["retry_count"] = 0 testJobF["outcome"] = "failure" testJobF.save() changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD, testJobE, testJobF]) fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileA.addRun(Run(1, *[45])) fileA.create() fileA.addParent(inputFileA["lfn"]) fileB = File(lfn="fileB", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileB.addRun(Run(1, *[45])) fileB.create() fileB.addParent(inputFileB["lfn"]) jobGroupA.output.addFile(fileA) jobGroupA.output.addFile(fileB) jobGroupA.output.commit() mergeFilesetA.addFile(fileA) mergeFilesetA.addFile(fileB) mergeFilesetA.commit() fileC = File(lfn="fileC", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileC.addRun(Run(1, *[45])) fileC.create() fileC.addParent(inputFileA["lfn"]) fileD = File(lfn="fileD", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileD.addRun(Run(1, *[45])) fileD.create() fileD.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileC) jobGroupB.output.addFile(fileD) mergeFilesetB.addFile(fileC) mergeFilesetB.addFile(fileD) mergeFilesetB.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=mergeSubscriptionB) result = jobFactory(min_merge_size=1, max_merge_size=20000, max_merge_events=7169) assert len(result) == 0, \ "Error: No merge jobs should have been created." fileE = File(lfn="fileE", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileE.addRun(Run(1, *[45])) fileE.create() fileE.addParent(inputFileA["lfn"]) fileF = File(lfn="fileF", size=1024, events=1024, first_event=0, locations={"T2_CH_CERN"}) fileF.addRun(Run(1, *[45])) fileF.create() fileF.addParent(inputFileB["lfn"]) jobGroupB.output.addFile(fileE) jobGroupB.output.addFile(fileF) mergeFilesetB.addFile(fileE) mergeFilesetB.addFile(fileF) mergeFilesetB.commit() testJobD["outcome"] = "success" testJobD.save() testJobF["outcome"] = "success" testJobF.save() changeStateDAO.execute([testJobD, testJobF]) result = jobFactory(min_merge_size=1, max_merge_size=20000, max_merge_events=7169) assert len(result) == 1, \ "Error: One merge job should have been created: %s" % len(result) return
def testOutput(self): """ _testOutput_ Creat a workflow and add some outputs to it. Verify that these are stored to and loaded from the database correctly. """ testFilesetA = Fileset(name="testFilesetA") testMergedFilesetA = Fileset(name="testMergedFilesetA") testFilesetB = Fileset(name="testFilesetB") testMergedFilesetB = Fileset(name="testMergedFilesetB") testFilesetC = Fileset(name="testFilesetC") testMergedFilesetC = Fileset(name="testMergedFilesetC") testFilesetA.create() testFilesetB.create() testFilesetC.create() testMergedFilesetA.create() testMergedFilesetB.create() testMergedFilesetC.create() testWorkflowA = Workflow(spec="spec.xml", owner="Simon", name="wf001", task='Test') testWorkflowA.create() testWorkflowB = Workflow(name="wf001", task='Test') testWorkflowB.load() self.assertEqual(len(testWorkflowB.outputMap.keys()), 0, "ERROR: Output map exists before output is assigned") testWorkflowA.addOutput("outModOne", testFilesetA, testMergedFilesetA) testWorkflowA.addOutput("outModOne", testFilesetC, testMergedFilesetC) testWorkflowA.addOutput("outModTwo", testFilesetB, testMergedFilesetB) testWorkflowC = Workflow(name="wf001", task='Test') testWorkflowC.load() self.assertEqual(len(testWorkflowC.outputMap.keys()), 2, "ERROR: Incorrect number of outputs in output map") self.assertTrue( "outModOne" in testWorkflowC.outputMap.keys(), "ERROR: Output modules missing from workflow output map") self.assertTrue( "outModTwo" in testWorkflowC.outputMap.keys(), "ERROR: Output modules missing from workflow output map") for outputMap in testWorkflowC.outputMap["outModOne"]: if outputMap["output_fileset"].id == testFilesetA.id: self.assertEqual( outputMap["merged_output_fileset"].id, testMergedFilesetA.id, "Error: Output map incorrectly maps filesets.") else: self.assertEqual( outputMap["merged_output_fileset"].id, testMergedFilesetC.id, "Error: Output map incorrectly maps filesets.") self.assertEqual( outputMap["output_fileset"].id, testFilesetC.id, "Error: Output map incorrectly maps filesets.") self.assertEqual( testWorkflowC.outputMap["outModTwo"][0] ["merged_output_fileset"].id, testMergedFilesetB.id, "Error: Output map incorrectly maps filesets.") self.assertEqual( testWorkflowC.outputMap["outModTwo"][0]["output_fileset"].id, testFilesetB.id, "Error: Output map incorrectly maps filesets.") return