def testCreateDeleteExistsNoFiles(self): """ _testCreateDeleteExistsNoFiles_ Create and then delete a job but don't add any input files to it. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="TestJob") assert testJob.exists() == False, "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, "ERROR: Job exists after it was delete" return
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == "mysql": return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create a JobGroup and then delete it. Use the JobGroup's exists() method to determine if it exists before it is created, after it is created and after it is deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testFileset = WMBSFileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) self.assertFalse(testJobGroup.exists()) testJobGroup.create() self.assertTrue(testJobGroup.exists()) testJobGroup.delete() self.assertFalse(testJobGroup.exists()) testSubscription.delete() testFileset.delete() testWorkflow.delete() return
def generateFakeMCFile(self, numEvents = 100, firstEvent = 1, lastEvent = 100, firstLumi = 1, lastLumi = 10, index = 1): #MC comes with only one MCFakeFile singleMCFileset = Fileset(name = "MCTestFileset %i" % index) singleMCFileset.create() newFile = File("MCFakeFileTest %i" % index, size = 1000, events = numEvents, locations = set(["somese.cern.ch"])) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent newFile.create() singleMCFileset.addFile(newFile) singleMCFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() singleMCFileSubscription = Subscription(fileset = singleMCFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Production") singleMCFileSubscription.create() return singleMCFileSubscription
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testWorkflow = Workflow(spec="spec.xml", owner="dmwm", name="testWorkflow_%s" % baseName[:4], task="Test") testWorkflow.create() testFileset = Fileset(name=baseName) for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T1_US_FNAL_Disk') testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T2_CH_CERN') testFileset.addFile(newFile) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() return testSubscription
def createTestJob(subscriptionType="Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name=makeUUID(), files=[testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup) testJob.associateFiles() return testJob
def testAddToFileset(self): """ _AddToFileset_ Test to see if we can add to a fileset using the DAO """ testFileset = Fileset(name = "inputFileset") testFileset.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run( 1, *[45])) testFileB.create() addToFileset = self.daofactory(classname = "Files.AddToFileset") addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFileset.id) testFileset2 = Fileset(name = "inputFileset") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 2) for file in testFileset2.files: self.assertTrue(file in [testFileA, testFileB]) # Check that adding twice doesn't crash addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFileset.id)
def testCreateTransaction(self): """ _testCreateTransaction_ Create a JobGroup and commit it to the database. Rollback the database transaction and verify that the JobGroup is no longer in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testFileset = WMBSFileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) assert testJobGroup.exists() == False, "ERROR: Job group exists before it was created" myThread = threading.currentThread() myThread.transaction.begin() testJobGroup.create() assert testJobGroup.exists() >= 0, "ERROR: Job group does not exist after it was created" myThread.transaction.rollback() assert testJobGroup.exists() == False, "ERROR: Job group exists after transaction was rolled back." testSubscription.delete() testFileset.delete() testWorkflow.delete() return
def createWorkflow(self, task): """ Register job into WMBS for each task through Workflows """ specURL = self.getWorkflowURL(task) fileSet = Fileset(name=self.getFilesetName(task), is_open=True) fileSet.create() taskFlow = Workflow(spec=specURL, owner=self.owner, dn=self.owner_dn, name=self.getWorkflowName(task), task=task.name()) taskFlow.create() self.workflowDict[task.name()] = taskFlow # Insert workflow into task setattr(task.data.input.WMBS, 'WorkflowSpecURL', specURL) # If the job is a merge job # Find the task it merges from # Then find the workflow for that task and assign it an output if hasattr(task.inputReference(), 'outputModule'): dummyStepName = task.inputReference().inputStep.split('/')[-1] taskName = task.inputReference().inputStep.split('/')[-2] outputModule = task.inputReference().outputModule if taskName not in self.workflowDict.keys(): raise Exception('I am being asked to chain output for a task %s which does not yet exist' % taskName) outputWorkflow = self.workflowDict[taskName] outputWorkflow.addOutput(outputModule, fileSet) logging.info('Registered workflow for step %s' % (task.name())) return taskFlow, fileSet
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec = makeUUID(), owner = "Steve", name = makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJobA = Job(name = makeUUID(), files = []) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroup) testJobA["state"] = "executing" testJobB = Job(name = makeUUID(), files = []) testJobB["couch_record"] = makeUUID() testJobB.create(group = testJobGroup) testJobB["state"] = "complete" testJobC = Job(name = makeUUID(), files = []) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def createCommonFileset(): """ Create a simple fileset with 2 files at the same location """ multipleFilesFileset = Fileset(name="TestFileset") newFile = File("/some/file/test1", size=1000, events=100) newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7])) newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test2", size=2000, events=200) newFile.addRun(Run(3, *[2, 8])) newFile.addRun(Run(4, *[3, 8])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test3", size=3000, events=300) newFile.addRun(Run(5, *[10, 11, 12])) newFile.addRun(Run(6, *[10, 11, 12])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test4", size=4000, events=400) newFile.addRun(Run(2, *[3, 8, 9])) newFile.addRun(Run(3, *[3, 4, 5, 6])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) multipleFilesFileset.create() return multipleFilesFileset
def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102]) testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob['mask'].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob['mask'].filterRunLumisByMask([run0, run1]) return
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() for i in range(nFiles): newFile = self.createFile("%s_%i" % (baseName, i), nEventsPerFile, i, lumisPerFile, "somese.cern.ch") newFile.create() testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile("%s_%i_2" % (baseName, i), nEventsPerFile, i, lumisPerFile, "otherse.cern.ch") newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing" ) testSubscription.create() return testSubscription
def testDifferentSubscritionIDs(self): """ _testDifferentSubscriptionIDs_ Make sure that the merge splitting still runs if the subscription ID is not equal to the workflow ID. """ myThread = threading.currentThread() myThread.transaction.begin() dummyWorkflow = Workflow(name = "dummyWorkflow", spec = "bunk49", owner = "Steve", task="Test2") dummyWorkflow.create() dummyFileset = Fileset(name = "dummyFileset") dummyFileset.create() dummySubscription1 = Subscription(fileset = dummyFileset, workflow = dummyWorkflow, split_algo = "ParentlessMergeBySize") dummySubscription2 = Subscription(fileset = dummyFileset, workflow = dummyWorkflow, split_algo = "ParentlessMergeBySize") dummySubscription1.create() dummySubscription2.create() myThread.transaction.commit() self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = self.mergeSubscription) result = jobFactory(min_merge_size = 4097, max_merge_size = 99999999, max_merge_events = 999999999, merge_across_runs = False) self.assertEqual(len(result), 1) jobGroup = result[0] self.assertEqual(len(jobGroup.jobs), 2) return
def testFilesWithoutOtherSubscriptions(self): """ _testFilesWithoutOtherSubscriptions_ Test the case where files only in the delete subscription can happen if cleanup of the other subscriptions is fast """ testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test") testWorkflowA.create() testFileset = Fileset(name="TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size=1000, events=100, locations=set(["somese.cern.ch"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription( fileset=testFileset, workflow=testWorkflowA, split_algo="SiblingProcessingBased", type="Processing" ) testSubscriptionA.create() splitter = SplitterFactory() deleteFactoryA = splitter(package="WMCore.WMBS", subscription=testSubscriptionA) result = deleteFactoryA(files_per_job=50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
def createSingleJobWorkflow(self): """ Create a workflow with one jobs and two files and store the results in instance variables """ self.testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") self.testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=self.testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() self.testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) self.testFileA.addRun(Run(1, *[45])) self.testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) self.testFileB.addRun(Run(1, *[46])) self.testFileA.create() self.testFileB.create() self.testJob = Job(name="TestJob", files=[self.testFileA, self.testFileB]) self.testJob.create(group=testJobGroup) self.testJob.associateFiles()
def createTestSubscription(self, nFiles, nSites=1, closeFileset=False): """ _createTestSubscription_ Create a set of test subscriptions for testing purposes. """ if nSites > self.nSites: nSites = self.nSites testFileset = Fileset(name="TestFileset") testFileset.create() # Create a testWorkflow testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() # Create the files for each site for s in range(nSites): for i in range(nFiles): newFile = File(makeUUID(), size=1024, events=100, locations=set(["site%i.cern.ch" % s])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing" ) testSubscription.create() # Close the fileset if closeFileset: testFileset.markOpen(isOpen=False) return testSubscription
def createLargeFileBlock(self): """ _createLargeFileBlock_ Creates a large group of files for testing """ testFileset = Fileset(name = "TestFilesetX") testFileset.create() for i in range(5000): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf003", task="Test" ) testWorkflow.create() largeSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") largeSubscription.create() return largeSubscription
def testD_NonContinuousLumis(self): """ _NonContinuousLumis_ Test and see if LumiBased can work when the lumis are non continuous """ baseName = makeUUID() nFiles = 10 testFileset = Fileset(name = baseName) testFileset.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") # Set to two non-continuous lumi numbers lumis = [100 + i, 200 + i] newFile.addRun(Run(i, *lumis)) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = testSubscription) jobGroups = jobFactory(lumis_per_job = 2, halt_job_on_file_boundaries = False, splitOnRun = False, performance = self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 10) for j in jobs: runs = j['mask'].getRunAndLumis() for r in runs.keys(): self.assertEqual(len(runs[r]), 2) for l in runs[r]: # Each run should have two lumis # Each lumi should be of form [x, x] # meaning that the first and last lumis are the same self.assertEqual(len(l), 2) self.assertEqual(l[0], l[1]) self.assertEqual(j['estimatedJobTime'], 100 * 12) self.assertEqual(j['estimatedDiskUsage'], 100 * 400) self.assertEqual(j['estimatedMemoryUsage'], 2300) return
def testAddDupsToFilesetBulk(self): """ _AddToDupsFilesetBulk_ Same as testAddDupsToFileset() but faster """ testWorkflowA = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflowA.create() testWorkflowB = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production2") testWorkflowB.create() testFilesetA = Fileset(name = "inputFilesetA") testFilesetA.create() testFilesetB = Fileset(name = "inputFilesetB") testFilesetB.create() testSubscriptionA = Subscription(workflow = testWorkflowA, fileset = testFilesetA) testSubscriptionA.create() testSubscriptionB = Subscription(workflow = testWorkflowB, fileset = testFilesetB) testSubscriptionB.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, locations = ['SiteA']) testFileA.addRun(Run( 1, *[45])) testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10, locations = ['SiteB']) testFileB.addRun(Run( 1, *[45])) addFilesToWMBSInBulk(testFilesetA.id, "wf001", [testFileA, testFileB], conn = testFileA.getDBConn(), transaction = testFileA.existingTransaction()) testFileset2 = Fileset(name = "inputFilesetA") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 2) for file in testFileset2.files: self.assertTrue(file in [testFileA, testFileB]) # Check that adding twice doesn't crash addFilesToWMBSInBulk(testFilesetA.id, "wf001", [testFileA, testFileB], conn = testFileA.getDBConn(), transaction = testFileA.existingTransaction()) # Files should not get added to fileset B because fileset A is associated # with wf001. addFilesToWMBSInBulk(testFilesetB.id, "wf001", [testFileA, testFileB], conn = testFileA.getDBConn(), transaction = testFileA.existingTransaction()) testFileset2 = Fileset(name = "inputFilesetB") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 0) return
def testAddDupsToFileset(self): """ _AddToDupsFileset_ Verify the the dups version of the AddToFileset DAO will not add files to a fileset if they're already associated to another fileset with the same workflow. """ testWorkflowA = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflowA.create() testWorkflowB = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production2") testWorkflowB.create() testFilesetA = Fileset(name = "inputFilesetA") testFilesetA.create() testFilesetB = Fileset(name = "inputFilesetB") testFilesetB.create() testSubscriptionA = Subscription(workflow = testWorkflowA, fileset = testFilesetA) testSubscriptionA.create() testSubscriptionB = Subscription(workflow = testWorkflowB, fileset = testFilesetB) testSubscriptionB.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run( 1, *[45])) testFileB.create() addToFileset = self.daofactory(classname = "Files.AddDupsToFileset") addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFilesetA.id, workflow = "wf001") testFileset2 = Fileset(name = "inputFilesetA") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 2) for file in testFileset2.files: self.assertTrue(file in [testFileA, testFileB]) # Check that adding twice doesn't crash addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFilesetA.id, workflow = "wf001") # Files should not get added to fileset B because fileset A is associated # with wf001. addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFilesetB.id, workflow = "wf001") testFileset2 = Fileset(name = "inputFilesetB") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 0) return
def _createThisSubscription(self, initialCounter=1): """ Private function to create a fileset and subscription with different fileset and file names :param initialCounter: just a simple integer to be appended to files :return: an splitter instance (jobFactory) """ splitter = SplitterFactory() # Create 3 files with 100 events per lumi: # - file1 with 1 run of 8 lumis # - file2 with 2 runs of 2 lumis each # - file3 with 1 run of 5 lumis testFileset = Fileset(name='Fileset%s' % initialCounter) fileA = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=800) lumiListA = [] for lumi in range(8): lumiListA.append(10 + lumi) fileA.addRun(Run(1, *lumiListA)) fileA.setLocation("T1_US_FNAL_Disk") initialCounter = int(initialCounter) + 1 fileB = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=400) lumiListB1 = [] lumiListB2 = [] for lumi in range(2): lumiListB1.append(20 + lumi) lumiListB2.append(30 + lumi) fileB.addRun(Run(2, *lumiListB1)) fileB.addRun(Run(3, *lumiListB2)) fileB.setLocation("T1_US_FNAL_Disk") initialCounter = int(initialCounter) + 1 fileC = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=500) lumiListC = [] for lumi in range(5): lumiListC.append(40 + lumi) fileC.addRun(Run(4, *lumiListC)) fileC.setLocation("T1_US_FNAL_Disk") testFileset.addFile(fileA) testFileset.addFile(fileB) testFileset.addFile(fileC) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) return jobFactory
def testParentageByJob(self): """ _testParentageByJob_ Tests the DAO that assigns parentage by Job """ testWorkflow = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testJobA = Job() testJobA.create(group = testJobGroup) testJobA.addFile(testFileParentA) testJobA.addFile(testFileParentB) testJobA.associateFiles() parentAction = self.daofactory(classname = "Files.SetParentageByJob") parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']}) testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, True, "ERROR: Unknown parent file") goldenFiles.remove(parentFile) self.assertEqual(len(goldenFiles), 0, "ERROR: Some parents are missing")
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, taskType='Processing', name=None, wfPrio=1, changeState=None): """ _createJobGroups_ Creates a series of jobGroups for submissions changeState is an instance of the ChangeState class to make job status changes """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production", priority=wfPrio) testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) if changeState: for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') return jobGroupList
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]): """ Creates a series of jobGroups for submissions """ jobGroupList = [] testWorkflow = Workflow( spec=workloadSpec, owner="tapas", name=makeUUID(), task="basicWorkload/Production", owner_vogroup="phgroup", owner_vorole="cmsrole", ) testWorkflow.create() # Create subscriptions for i in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs( name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site, bl=bl, wl=wl, ) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList
def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a new job and commit it to the database. Start a new transaction and delete the file from the database. Verify that the file has been deleted. After that, roll back the transaction and verify that the job is once again in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() is False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" myThread = threading.currentThread() myThread.transaction.begin() testJob.delete() assert testJob.exists() is False, \ "ERROR: Job exists after it was delete" myThread.transaction.rollback() assert testJob.exists() >= 0, \ "ERROR: Job does not exist after transaction was rolled back." return
def stuffWMBS(self): """ _stuffWMBS_ Stuff WMBS with workflows """ workflow = Workflow(spec = 'spec.xml', name = 'ReRecoTest_v0Emulator', task = '/ReRecoTest_v0Emulator/Test', priority = 10) workflow.create() inputFileset = Fileset(name = 'TestFileset') inputFileset.create() subscription = Subscription(inputFileset, workflow) subscription.create()
def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name = baseName) testFileset.create() parentFile = File('%s_parent' % (baseName), size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000, events = 100, locations = "otherse.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() return testSubscription
def testLargeNumberOfFiles(self): """ _testLargeNumberOfFiles_ Setup a subscription with 500 files and verify that the splitting algo works correctly. """ testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve", name = "wfA", task = "Test") testWorkflowA.create() testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve", name = "wfB", task = "Test") testWorkflowB.create() testFileset = Fileset(name = "TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription(fileset = testFileset, workflow = testWorkflowA, split_algo = "FileBased", type = "Processing") testSubscriptionA.create() testSubscriptionB = Subscription(fileset = testFileset, workflow = testWorkflowB, split_algo = "SiblingProcessingBased", type = "Processing") testSubscriptionB.create() testSubscriptionA.completeFiles(allFiles) splitter = SplitterFactory() deleteFactoryA = splitter(package = "WMCore.WMBS", subscription = testSubscriptionB) result = deleteFactoryA(files_per_job = 50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name = '%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup
def testD_HardLimitSplittingOnly(self): """ _testD_HardLimitSplittingOnly_ Checks that we can split a set of files where every file has a single lumi too big to fit in a runnable job """ splitter = SplitterFactory() # Create 3 single-big-lumi files testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail sing lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup") for i in range(1, 4): self.assertTrue(jobs[i - 1]['failedOnCreation'], "The job processing the second file should me marked for failure") error = 'File /this/is/file%s has a single lumi %d, in run %s' % (i, i - 1, i - 1) error += ' with too many events 1000 and it woud take 12000 sec to run' self.assertEqual(jobs[i - 1]['failedReason'], error) return
def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a JobGroup and then commit it to the database. Begin a transaction and the delete the JobGroup from the database. Using the exists() method verify that the JobGroup is not in the database. Finally, roll back the transaction and verify that the JobGroup is in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testFileset = WMBSFileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) self.assertFalse(testJobGroup.exists()) testJobGroup.create() self.assertTrue(testJobGroup.exists()) myThread = threading.currentThread() myThread.transaction.begin() testJobGroup.delete() self.assertFalse(testJobGroup.exists()) myThread.transaction.rollback() self.assertTrue(testJobGroup.exists()) testSubscription.delete() testFileset.delete() testWorkflow.delete() return
def createTestJobGroup(self, nJobs, subType="Processing", retryOnce=False): """ _createTestJobGroup_ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() for _ in range(0, nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJobGroup.commit() if retryOnce: self.increaseRetry.execute(testJobGroup.jobs) return testJobGroup
def testFilesWithoutOtherSubscriptions(self): """ _testFilesWithoutOtherSubscriptions_ Test the case where files only in the delete subscription can happen if cleanup of the other subscriptions is fast """ testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test") testWorkflowA.create() testFileset = Fileset(name="TestFileset") testFileset.create() allFiles = [] for i in range(500): testFile = File(str(i), size=1000, events=100, locations=set(["T2_CH_CERN"])) testFile.create() allFiles.append(testFile) testFileset.addFile(testFile) testFileset.commit() testSubscriptionA = Subscription(fileset=testFileset, workflow=testWorkflowA, split_algo="SiblingProcessingBased", type="Processing") testSubscriptionA.create() splitter = SplitterFactory() deleteFactoryA = splitter(package="WMCore.WMBS", subscription=testSubscriptionA) result = deleteFactoryA(files_per_job=50) self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.") self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.") return
def createTestJobGroup(self, commitFlag=True): """ _createTestJobGroup_ """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = WMBSFileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation("T2_CH_CERN") testFileA.setLocation("malpaquet") testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation("T2_CH_CERN") testFileB.setLocation("malpaquet") testFileA.create() testFileB.create() testJobA = Job(name="TestJobA") testJobA.addFile(testFileA) testJobB = Job(name="TestJobB") testJobB.addFile(testFileB) testJobGroup.add(testJobA) testJobGroup.add(testJobB) if commitFlag: testJobGroup.commit() return testJobGroup
def injectTaskIntoWMBS(specUrl, workflowName, task, inputFileset, indent=0): """ _injectTaskIntoWMBS_ """ print "%sinjecting %s" % (doIndent(indent), task.getPathName()) print "%s input fileset: %s" % (doIndent(indent), inputFileset.name) myWorkflow = Workflow(spec=specUrl, owner=arguments['Requestor'], name=workflowName, task=task.getPathName()) myWorkflow.create() mySubscription = Subscription(fileset=inputFileset, workflow=myWorkflow, split_algo=task.jobSplittingAlgorithm(), type=task.taskType()) mySubscription.create() outputModules = task.getOutputModulesForStep(task.getTopStepName()) for outputModuleName in outputModules.listSections_(): print "%s configuring output module: %s" % (doIndent(indent), outputModuleName) if task.taskType() == "Merge": outputFilesetName = "%s/merged-%s" % (task.getPathName(), outputModuleName) else: outputFilesetName = "%s/unmerged-%s" % (task.getPathName(), outputModuleName) print "%s output fileset: %s" % (doIndent(indent), outputFilesetName) outputFileset = Fileset(name=outputFilesetName) outputFileset.create() myWorkflow.addOutput(outputModuleName, outputFileset) # See if any other steps run over this output. print "%s searching for child tasks..." % (doIndent(indent)) for childTask in task.childTaskIterator(): if childTask.data.input.outputModule == outputModuleName: injectTaskIntoWMBS(specUrl, workflowName, childTask, outputFileset, indent + 4)
def testUpdateFailedDoc(self): """ _testUpdateFailedDoc_ Verify that the update function will work correctly and not throw a 500 error if the doc didn't make it into the database for some reason. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() testFileA = File(lfn="SomeLFNA", events=1024, size=2048, locations=set(["T2_CH_CERN"])) testFileA.create() testFileset.addFile(testFileA) testFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Merge" testJobA["couch_record"] = str(testJobA["id"]) change.propagate([testJobA], "new", "none") testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) self.assertTrue("states" in testJobADoc) self.assertTrue("1" in testJobADoc["states"]) return
def createTestJobGroup(self): """ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for _ in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create and then delete a job. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() == False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, \ "ERROR: Job exists after it was delete" return
def testCreateTransaction(self): """ _testCreateTransaction_ Create a job and save it to the database. Roll back the database transaction and verify that the job is no longer in the database. """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileA.create() testFileB.create() myThread = threading.currentThread() myThread.transaction.begin() testJob = Job(name = "TestJob", files = [testFileA, testFileB]) assert testJob.exists() == False, \ "ERROR: Job exists before it was created" testJob.create(group = testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" myThread.transaction.rollback() assert testJob.exists() == False, \ "ERROR: Job exists after transaction was rolled back." return
def createJobCollection(self, name, nSubs, nFiles, workflowURL='test'): """ _createJobCollection_ Create a collection of jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") testWorkflow.create() for sub in range(nSubs): nameStr = '%s-%i' % (name, sub) myThread.transaction.begin() testFileset = Fileset(name=nameStr) testFileset.create() for f in range(nFiles): # pick a random site site = random.choice(self.sites) testFile = File(lfn="/lfn/%s/%i" % (nameStr, f), size=1024, events=10) testFile.setLocation(site) testFile.create() testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() myThread.transaction.commit() return
def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100, firstLumi=1, lastLumi=10, index=1, existingSub=None): """ _generateFakeMCFile_ Generates a fake MC file for testing production EventBased creation of jobs, it creates a single file subscription if no existing subscription is provided. """ # MC comes with MCFakeFile(s) newFile = File("MCFakeFile-some-hash-%s" % str(index).zfill(5), size=1000, events=numEvents, locations=set(["somese.cern.ch"])) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent newFile.create() if existingSub is None: singleMCFileset = Fileset(name="MCTestFileset-%i" % index) singleMCFileset.create() singleMCFileset.addFile(newFile) singleMCFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() singleMCFileSubscription = Subscription(fileset=singleMCFileset, workflow=testWorkflow, split_algo="EventBased", type="Production") singleMCFileSubscription.create() return singleMCFileSubscription else: existingSub['fileset'].addFile(newFile) existingSub['fileset'].commit() return existingSub
def testC_HardLimitSplitting(self): """ _testC_HardLimitSplitting_ Test that we can specify a event limit, the algorithm shall take single lumi files with more events than the limit and mark them for failure """ splitter = SplitterFactory() # Create 3 files, the one in the middle is a "bad" file testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail sing lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup") self.assertTrue(jobs[3]['failedOnCreation'], "The job processing the second file should me marked for failure") self.assertEqual(jobs[3]['failedReason'], 'File /this/is/file2 has a single lumi 1, in run 1 with too many events 1000 and it woud take 12000 sec to run')
def testCreateTransaction(self): """ _testCreateTransaction_ Create a JobGroup and commit it to the database. Rollback the database transaction and verify that the JobGroup is no longer in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testFileset = WMBSFileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) assert testJobGroup.exists() == False, \ "ERROR: Job group exists before it was created" myThread = threading.currentThread() myThread.transaction.begin() testJobGroup.create() assert testJobGroup.exists() >= 0, \ "ERROR: Job group does not exist after it was created" myThread.transaction.rollback() assert testJobGroup.exists() == False, \ "ERROR: Job group exists after transaction was rolled back." testSubscription.delete() testFileset.delete() testWorkflow.delete() return
def createTestSubscription(self, nFiles, nSites=1, closeFileset=False): """ _createTestSubscription_ Create a set of test subscriptions for testing purposes. """ if nSites > self.nSites: nSites = self.nSites testFileset = Fileset(name="TestFileset") testFileset.create() # Create a testWorkflow testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() # Create the files for each site for s in range(nSites): for i in range(nFiles): newFile = File(makeUUID(), size=1024, events=100, locations=set(["T2_CH_CERN_%i" % s])) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing") testSubscription.create() # Close the fileset if closeFileset: testFileset.markOpen(isOpen=False) return testSubscription
def testLoadOutputID(self): """ _testLoadOutputID_ Test whether we can load an output ID for a job """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id) return
def createFileCollection(self, name, nSubs, nFiles, workflowURL = 'test', site = None): """ _createFileCollection_ Create a collection of files for splitting into jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec = workflowURL, owner = "mnorman", name = name, task="/TestWorkload/ReReco") testWorkflow.create() for sub in range(nSubs): nameStr = '%s-%i' % (name, sub) testFileset = Fileset(name = nameStr) testFileset.create() for f in range(nFiles): # pick a random site if not site: tmpSite = 'se.%s' % (random.choice(self.sites)) else: tmpSite = 'se.%s' % (site) testFile = File(lfn = "/lfn/%s/%i" % (nameStr, f), size = 1024, events = 10) testFile.setLocation(tmpSite) testFile.create() testFileset.addFile(testFile) testFileset.commit() testFileset.markOpen(isOpen = 0) testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() return
def testE_DisableHardLimitSplitting(self): """ _testC_DisableHardLimitSplitting_ Test that we can bypass and event limit when allowCreationFailure is set to False. The algorithm shall take single lumi files with more events than the limit but not mark them for failure """ splitter = SplitterFactory() # Create 3 files, the one in the middle is a "bad" file testFileset = Fileset(name="FilesetA") testFileset.create() testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "T1_US_FNAL_Disk") testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "T1_US_FNAL_Disk") testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "T1_US_FNAL_Disk") testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) # Settings are to split on job boundaries, to fail sing lumis with more than 800 events # and to put 550 events per job jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550, job_time_limit=9600, allowCreationFailure=False, performance=self.performanceParams) self.assertEqual(len(jobGroups), 1, "There should be only one job group") jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup") failedJobs = [job for job in jobs if job.get('failedOnCreation', False)] self.assertEqual(len(failedJobs), 0, "There should be no failed jobs")
def testCreateDeleteExistsNoFiles(self): """ _testCreateDeleteExistsNoFiles_ Create and then delete a job but don't add any input files to it. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="TestJob") assert testJob.exists() == False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, \ "ERROR: Job exists after it was delete" return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create a JobGroup and then delete it. Use the JobGroup's exists() method to determine if it exists before it is created, after it is created and after it is deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testFileset = WMBSFileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) assert testJobGroup.exists() == False, \ "ERROR: Job group exists before it was created" testJobGroup.create() assert testJobGroup.exists() >= 0, \ "ERROR: Job group does not exist after it was created" testJobGroup.delete() assert testJobGroup.exists() == False, \ "ERROR: Job group exists after it was deleted" testSubscription.delete() testFileset.delete() testWorkflow.delete() return
def createWorkflow(self, task): """ Register job into WMBS for each task through Workflows """ specURL = self.getWorkflowURL(task) fileSet = Fileset(name=self.getFilesetName(task), is_open=True) fileSet.create() taskFlow = Workflow(spec=specURL, owner=self.owner, owner_dn=self.owner_dn, name=self.getWorkflowName(task), task=task.name()) taskFlow.create() self.workflowDict[task.name()] = taskFlow #Insert workflow into task setattr(task.data.input.WMBS, 'WorkflowSpecURL', specURL) #If the job is a merge job #Find the task it merges from #Then find the workflow for that task and assign it an output if hasattr(task.inputReference(), 'outputModule'): stepName = task.inputReference().inputStep.split('/')[-1] taskName = task.inputReference().inputStep.split('/')[-2] outputModule = task.inputReference().outputModule if not taskName in self.workflowDict.keys(): raise Exception( 'I am being asked to chain output for a task %s which does not yet exist' % (taskName)) outputWorkflow = self.workflowDict[taskName] outputWorkflow.addOutput(outputModule, fileSet) logging.info('Registered workflow for step %s' % (task.name())) return taskFlow, fileSet
def setupRepackWorkflow(self): """ _setupRepackWorkflow_ Populate WMBS with a repack-like workflow, every subscription must be unfinished at first """ workflowName = 'Repack_Run481516_StreamZ' mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW', 'RepackMergewrite_SingleNeutrino_RAW'] self.stateMap = {'Merge' : [], 'Processing Done' : []} self.orderedStates = ['Merge', 'Processing Done'] # Populate WMStats self.wmstatsWriter.insertGenericRequest({'_id' : workflowName}) self.wmstatsWriter.updateRequestStatus(workflowName, 'Closed') # Create a wmspec in disk workload = newWorkload(workflowName) repackTask = workload.newTask('Repack') for task in mergeTasks: repackTask.addTask(task) repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW') specPath = os.path.join(self.testDir, 'Repack.pkl') workload.save(specPath) # Populate WMBS topFileset = Fileset(name = 'TestStreamerFileset') topFileset.create() options = {'spec' : specPath, 'owner' : 'ItsAMeMario', 'name' : workflowName, 'wfType' : 'tier0'} topLevelWorkflow = Workflow(task = '/%s/Repack' % workflowName, **options) topLevelWorkflow.create() topLevelSub = Subscription(topFileset, topLevelWorkflow) topLevelSub.create() self.stateMap['Merge'].append(topFileset) for task in mergeTasks: mergeWorkflow = Workflow(task = '/%s/Repack/%s' % (workflowName, task), **options) mergeWorkflow.create() unmergedFileset = Fileset(name = 'TestUnmergedFileset%s' % task) unmergedFileset.create() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.create() self.stateMap['Processing Done'].append(unmergedFileset) cleanupWorkflow = Workflow(task = '/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW', **options) cleanupWorkflow.create() unmergedFileset = Fileset(name = 'TestUnmergedFilesetToCleanup') unmergedFileset.create() cleanupSub = Subscription(unmergedFileset, cleanupWorkflow) cleanupSub.create() return
def testDifferentSubscritionIDs(self): """ _testDifferentSubscriptionIDs_ Make sure that the merge splitting still runs if the subscription ID is not equal to the workflow ID. """ myThread = threading.currentThread() myThread.transaction.begin() dummyWorkflow = Workflow(name="dummyWorkflow", spec="bunk49", owner="Steve", task="Test2") dummyWorkflow.create() dummyFileset = Fileset(name="dummyFileset") dummyFileset.create() dummySubscription1 = Subscription(fileset=dummyFileset, workflow=dummyWorkflow, split_algo="ParentlessMergeBySize") dummySubscription2 = Subscription(fileset=dummyFileset, workflow=dummyWorkflow, split_algo="ParentlessMergeBySize") dummySubscription1.create() dummySubscription2.create() myThread.transaction.commit() self.stuffWMBS() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=99999999, max_merge_events=999999999, merge_across_runs=False) self.assertEqual(len(result), 1) jobGroup = result[0] self.assertEqual(len(jobGroup.jobs), 2) return
def createJobs(self, nJobs): """ Creates a series of jobGroups for submissions """ testWorkflow = Workflow(spec="dummy", owner="mnorman", name="dummy", task="basicWorkload/Production") testWorkflow.create() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name="dummy") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs for id in range(nJobs): testJob = Job(name='Job_%i' % (id)) testJob['owner'] = "mnorman" testJob['location'] = 'Xanadu' testJob.create(testJobGroup) testJobGroup.add(testJob) testFileset.commit() testJobGroup.commit() return testJobGroup
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T1_US_FNAL_Disk') newFile.create() testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T2_CH_CERN') newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() return testSubscription
events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]}, locations = set(['srm.ciemat.es','storm-se-01.ba.infn.it','storage01.lcg.cscs.ch'])) myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"]) for lumi in dbsResult["LumiList"]: myRun.lumis.append(lumi["LumiSectionNumber"]) myFile.addRun(myRun) myFile.create() inputFileset.addFile(myFile) inputFileset.commit() inputFileset.markOpen(False) return for workloadTask in workload.taskIterator(): print "Worload ", workloadTask inputFileset = Fileset(name = workloadTask.getPathName()) inputFileset.create() inputDataset = workloadTask.inputDataset() inputDatasetPath = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier) injectFilesFromDBS(inputFileset, inputDatasetPath) injectTaskIntoWMBS(os.path.join(os.getcwd(), workloadName, workloadFile), workloadName, workloadTask, inputFileset)
class EventBasedTest(unittest.TestCase): """ _EventBasedTest_ Test event based job splitting. """ def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="site1", pnn="T2_CH_CERN") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(i, *[45 + i])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(1, *[45])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleFileRunset = Fileset(name="TestFileset3") self.multipleFileRunset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(i / 3, *[45])) newFile.create() self.multipleFileRunset.addFile(newFile) self.multipleFileRunset.commit() self.singleRunFileset = Fileset(name="TestFileset4") self.singleRunFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(1, *[45])) newFile.create() self.singleRunFileset.addFile(newFile) self.singleRunFileset.commit() self.singleRunMultipleLumi = Fileset(name="TestFileset5") self.singleRunMultipleLumi.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(1, *[45 + i])) newFile.create() self.singleRunMultipleLumi.addFile(newFile) self.singleRunMultipleLumi.commit() testWorkflow = Workflow(spec="spec.xml", owner="mnorman", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.multipleRunSubscription = Subscription( fileset=self.multipleFileRunset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleRunSubscription = Subscription( fileset=self.singleRunFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleRunMultipleLumiSubscription = Subscription( fileset=self.singleRunMultipleLumi, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription.create() self.multipleRunSubscription.create() self.singleRunSubscription.create() self.singleRunMultipleLumiSubscription.create() return def tearDown(self): """ _tearDown_ Tear down WMBS architechture. """ self.testInit.clearDatabase() return def testExactRuns(self): """ _testExactRuns_ Test run based job splitting when the number of events per job is exactly the same as the number of events in the input file. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleFileSubscription) jobGroups = jobFactory(files_per_job=1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." return def testMoreRuns(self): """ _testMoreEvents_ Test run based job splitting when the number of runs per job is greater than the number of runs in the input file. """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleFileSubscription) jobGroups = jobFactory(files_per_job=2) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." return def testMultipleRuns(self): """ _testMultipleRuns_ Test run based job splitting when the number of runs is equal to the number in each input file, with multiple files """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(files_per_job=1) assert len(jobGroups) == 10, \ "ERROR: JobFactory didn't return one JobGroup per run." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't put each run in a file." self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 1) return def testMultipleRunsCombine(self): """ _testMultipleRunsCombine_ Test run based job splitting when the number of jobs is less then the number of files, with multiple files """ splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleRunSubscription) jobGroups = jobFactory(files_per_job=2) assert len(jobGroups) == 4, \ "ERROR: JobFactory didn't return one JobGroup per run." assert len(jobGroups[1].jobs) == 2, \ "ERROR: JobFactory didn't put only one job in the first job" #Last one in the queue should have one job, previous two (three files per run) self.assertEqual(len(jobGroups[1].jobs.pop().getFiles(type="lfn")), 1) self.assertEqual(len(jobGroups[1].jobs.pop().getFiles(type="lfn")), 2) return def testSingleRunsCombineUneven(self): """ _testSingleRunsCombineUneven_ Test run based job splitting when the number of jobs is less then and indivisible by the number of files, with multiple files. """ #This should return two jobs, one with 8 and one with 2 files splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleRunSubscription) jobGroups = jobFactory(files_per_job=8) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 8) return def testPersistSingleRunsCombineUneven(self): """ _testPerisistSingleRunsCombineUneven_ Test run based job splitting when the number of jobs is less then and indivisible by the number of files, with multiple files. """ #This should return two jobs, one with 8 and one with 2 files splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.singleRunSubscription) jobGroups = jobFactory(files_per_job=8) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 8) return def testSingleRunsMultipleLumiCombineUneven(self): """ _testSingleRunsMultipeLumiCombineUneven_ Test run based job splitting when the number of jobs is less then and indivisible by the number of files, with multiple files. """ #This should return two jobs, one with 8 and one with 2 files splitter = SplitterFactory() jobFactory = splitter( package="WMCore.WMBS", subscription=self.singleRunMultipleLumiSubscription) jobGroups = jobFactory(files_per_job=8) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 2) self.assertEqual(len(jobGroups[0].jobs.pop().getFiles(type="lfn")), 8) return
def __call__(self, parameters): """ Perform the work required with the given parameters """ DefaultSlave.__call__(self, parameters) # Handle the message message = self.messageArgs # Lock on the running feeders list myThread = threading.currentThread() myThread.runningFeedersLock.acquire() # Create empty fileset if fileset.name doesn't exist filesetName = message["dataset"] feederType = message["FeederType"] fileType = message["FileType"] startRun = message["StartRun"] logging.debug("Dataset " + filesetName + " arrived") fileset = Fileset(name = filesetName+':'\ +feederType+':'+fileType+':'+startRun) # Check if the fileset is already there if fileset.exists() == False: # Empty fileset creation fileset.create() fileset.setLastUpdate(0) logging.info("Fileset %s whith id %s is added" \ %(fileset.name, str(fileset.id))) # Get feeder type feederType = message["FeederType"] # Check if there is a running feeder if feederType in myThread.runningFeeders: logging.info("HAVE FEEDER " + feederType + " RUNNING") logging.info(myThread.runningFeeders[feederType]) else: logging.info("NO FEEDER " + feederType + " RUNNING") # Check if we have a feeder in DB if self.queries.checkFeeder(feederType): # Have feeder, get info logging.info("Getting Feeder from DB") feederId = self.queries.getFeederId(feederType) logging.info(feederId) myThread.runningFeeders[feederType] = feederId else: # Create feeder logging.info("Adding Feeder to DB") self.queries.addFeeder(feederType, "StatePath") feederId = self.queries.getFeederId(feederType) logging.info(feederId) myThread.runningFeeders[feederType] = feederId # Fileset/Feeder association self.queries.addFilesetToManage(fileset.id, \ myThread.runningFeeders[feederType]) logging.info("Fileset %s is added to feeder %s" %(fileset.id, \ myThread.runningFeeders[feederType])) else: # If fileset already exist a new subscription # will be created for its workflow logging.info("Fileset exists: Subscription will be created for it") # Open it if close fileset.load() if fileset.open == False: fileset.markOpen(True) logging.info("Getting Feeder from DB") feederId = self.queries.getFeederId(feederType) logging.info(feederId) myThread.runningFeeders[feederType] = feederId self.queries.addFilesetToManage(fileset.id, \ myThread.runningFeeders[feederType]) logging.info("Fileset %s is added to feeder %s" %(fileset.id, \ myThread.runningFeeders[feederType])) myThread.runningFeedersLock.release() myThread.msgService.finish()