def setUp(self): """ _setUp_ Install the DBSBuffer schema into the database and connect to PhEDEx. """ self.phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/test" self.dbsURL = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet" self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase = True) self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "DBSBufferFiles.AddLocation") locationAction.execute(siteName = "srm-cms.cern.ch") locationAction.execute(siteName = "se.fnal.gov") self.testFilesA = [] self.testFilesB = [] self.testDatasetA = "/%s/PromptReco-v1/RECO" % makeUUID() self.testDatasetB = "/%s/CRUZET11-v1/RAW" % makeUUID() self.phedex = PhEDEx({"endpoint": self.phedexURL}, "json") return
def createTestJob(subscriptionType="Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name=makeUUID(), files=[testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup) testJob.associateFiles() return testJob
def testSubscription(self): """ _testSubscription_ Verify that the subscription API works. """ datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID() datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID() xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB) self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData) testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS", "Saturn") xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, testSub.getDatasetPaths()) result = self.phedexApi.subscribe(testSub, xmlData) requestIDs = result["phedex"]["request_created"] self.assertEqual(len(requestIDs), 1, "Error: Wrong number of request IDs") self.assertTrue("id" in requestIDs[0], "Error: Missing request ID") return
def testLotsOfAncestors(self): """ _testLotsOfAncestors_ Create a file with 15 parents with each parent having 100 parents to verify that the query to return grandparents works correctly. """ raise nose.SkipTest testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, checksums={"cksum": "1"}, locations="se1.fnal.gov") testFileA.create() for i in xrange(15): testParent = File(lfn=makeUUID(), size=1024, events=10, checksums={"cksum": "1"}, locations="se1.fnal.gov") testParent.create() testFileA.addParent(testParent["lfn"]) for i in xrange(100): testGParent = File(lfn=makeUUID(), size=1024, events=10, checksums={"cksum": "1"}, locations="se1.fnal.gov") testGParent.create() testParent.addParent(testGParent["lfn"]) assert len(testFileA.getAncestors(level=2, type="lfn")) == 1500, \ "ERROR: Incorrect grand parents returned" return
def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102]) testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob['mask'].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob['mask'].filterRunLumisByMask([run0, run1]) return
def testTime(self): nUIDs = 100000 startTime = time.clock() for i in range(0,nUIDs): makeUUID() print("We can make %i UUIDs in %f seconds" %(nUIDs, time.clock() - startTime))
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, taskType='Processing', name=None, wfPrio=1, changeState=None): """ _createJobGroups_ Creates a series of jobGroups for submissions changeState is an instance of the ChangeState class to make job status changes """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production", priority=wfPrio) testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) if changeState: for group in jobGroupList: changeState.propagate(group.jobs, 'created', 'new') return jobGroupList
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.multipleFileFileset = Fileset(name = "TestFileset1") for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(i, *[45+i])) self.multipleFileFileset.addFile(newFile) self.singleFileFileset = Fileset(name = "TestFileset2") newFile = File("/some/file/name", size = 1000, events = 100) newFile.addRun(Run(1, *[45])) self.singleFileFileset.addFile(newFile) self.multipleFileLumiset = Fileset(name = "TestFileset3") for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[45+i/3])) self.multipleFileLumiset.addFile(newFile) self.singleLumiFileset = Fileset(name = "TestFileset4") for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[45])) self.singleLumiFileset.addFile(newFile) testWorkflow = Workflow() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.multipleLumiSubscription = Subscription(fileset = self.multipleFileLumiset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.singleLumiSubscription = Subscription(fileset = self.singleLumiFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") return
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec = makeUUID(), owner = "Steve", name = makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJobA = Job(name = makeUUID(), files = []) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroup) testJobA["state"] = "executing" testJobB = Job(name = makeUUID(), files = []) testJobB["couch_record"] = makeUUID() testJobB.create(group = testJobGroup) testJobB["state"] = "complete" testJobC = Job(name = makeUUID(), files = []) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def createFile(): """ _createFile_ Create a file with some random metdata. """ newFile = File(lfn = makeUUID(), size = random.randrange(1024, 1048576, 1024), events = random.randrange(10, 100000, 50), parents = [File(lfn = makeUUID())], locations = makeUUID()) newFile["first_event"] = 0 newFile["last_event"] = 0 newFile["id"] = 1 return newFile
def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site, taskType='Processing', name=None): """ _createJobGroups_ Creates a series of jobGroups for submissions """ jobGroupList = [] if name is None: name = makeUUID() testWorkflow = Workflow(spec=workloadSpec, owner="tapas", name=name, task="basicWorkload/Production", priority=1) testWorkflow.create() # Create subscriptions for _ in range(nSubs): name = makeUUID() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name=name) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type=taskType, split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs self.makeNJobs(name=name, task=task, nJobs=nJobs, jobGroup=testJobGroup, fileset=testFileset, sub=testSubscription.exists(), site=site) testFileset.commit() testJobGroup.commit() jobGroupList.append(testJobGroup) return jobGroupList
def testC_Profile(self): """ _Profile_ DON'T RUN THIS! """ import cProfile import pstats name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10) cleanCouch = CleanCouchPoller(config=config) cleanCouch.setup() cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename="testStats.stat") p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats() return
def getBlock(self, newFile, location, skipOpenCheck=False): """ _getBlock_ Retrieve a block is one exists with matching datasetpath/location and is open. If no such block is found create and return a new one. """ datasetpath = newFile["datasetPath"] for block in self.blockCache.values(): if datasetpath == block.getDatasetPath() and location == block.getLocation(): if not self.isBlockOpen(newFile=newFile, block=block) and not skipOpenCheck: # Block isn't open anymore. Mark it as pending so that it gets uploaded. block.setPendingAndCloseBlock() else: return block # A suitable open block does not exist. Create a new one. blockname = "%s#%s" % (datasetpath, makeUUID()) newBlock = DBSBufferBlock(name=blockname, location=location, datasetpath=datasetpath) parent = self.datasetParentageCache.get(datasetpath) if parent: newBlock.addDatasetParent(parent) logging.debug("Get block: Child dataset %s, Parent dataset %s", datasetpath, parent) self.blockCache[blockname] = newBlock return newBlock
def testTestNonProxySplitting(self): """ _TestNonProxySplitting_ Test and see if we can split things without a proxy. """ config = self.getConfig() config.JobCreator.workerThreads = 1 name = makeUUID() workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.stuffWMBS(workflowURL=workloadPath, name=name) testJobCreator = JobCreatorPoller(config=config) testJobCreator.algorithm() getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), 1) result = getJobsAction.execute(state='Created', jobType="Merge") self.assertEqual(len(result), 0) return
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'blenheim') testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'malpaquet') testFileset.addFile(newFile) testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") return testSubscription
def createJob(self, streamerList, jobEvents, jobSize, timePerEvent, sizePerEvent, memoryRequirement): """ _createJob_ create an express job processing the passed in list of streamers """ if not self.createdGroup: self.newGroup() self.createdGroup = True self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID())) for streamer in streamerList: f = File(id = streamer['id'], lfn = streamer['lfn']) f.setLocation(streamer['location'], immediateSave = False) self.currentJob.addFile(f) # job time based on # - 5 min initialization (twice) # - 0.5MB/s repack speed # - reco with timePerEvent # - checksum calculation at 5MB/s # - stageout at 5MB/s # job disk based on # - streamer or RAW on local disk (factor 1) # - FEVT/ALCARECO/DQM on local disk (sizePerEvent) jobTime = 600 + jobSize/500000 + jobEvents*timePerEvent + (jobEvents*sizePerEvent*2)/5000000 self.currentJob.addResourceEstimates(jobTime = min(jobTime, 47*3600), disk = min(jobSize/1024 + jobEvents*sizePerEvent, 20000000), memory = memoryRequirement) return
def __call__(self): """ __call__ Generate some random data """ # Generate somewhere between one and a thousand files name = "ThisIsATest_%s" % (makeUUID()) nFiles = random.randint(10, 2000) name = name.replace('-', '_') name = '%s-v0' % name files = self.getFiles(name = name, nFiles = nFiles) print("Inserting %i files for dataset %s" % (nFiles * 2, name)) try: self.dbsUploader.algorithm() except: self.dbsUploader.close() raise # Repeat just to make sure try: self.dbsUploader.algorithm() except: self.dbsUploader.close() raise return
def testD_Timing(self): """ _Timing_ This is to see how fast things go. """ myThread = threading.currentThread() name = makeUUID() config = self.getConfig() jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10) testTaskArchiver = TaskArchiverPoller(config=config) startTime = time.time() testTaskArchiver.algorithm() stopTime = time.time() result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall() self.assertEqual(len(result), 0) result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall() self.assertEqual(len(result), 0) testWMBSFileset = Fileset(id=1) self.assertEqual(testWMBSFileset.exists(), False) logging.info("TaskArchiver took %f seconds", (stopTime - startTime))
def testProfilePoller(self): """ Profile your performance You shouldn't be running this normally because it doesn't do anything """ name = makeUUID() nSubs = 5 nFiles = 1500 workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) config = self.getConfig() testJobCreator = JobCreatorPoller(config=config) cProfile.runctx("testJobCreator.algorithm()", globals(), locals(), filename="testStats.stat") getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") time.sleep(10) self.assertEqual(len(result), nSubs * nFiles) p = pstats.Stats('testStats.stat') p.sort_stats('cumulative') p.print_stats(.2) return
def testProfileWorker(self): """ Profile where the work actually gets done You shouldn't be running this one either, since it doesn't test anything. """ name = makeUUID() nSubs = 5 nFiles = 500 workloadName = 'TestWorkload' workload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) config = self.getConfig() configDict = {"couchURL": config.JobStateMachine.couchurl, "couchDBName": config.JobStateMachine.couchDBName, 'jobCacheDir': config.JobCreator.jobCacheDir, 'defaultJobType': config.JobCreator.defaultJobType} subs = [{"subscription": 1}, {"subscription": 2}, {"subscription": 3}, {"subscription": 4}, {"subscription": 5}] testJobCreator = JobCreatorPoller(**configDict) cProfile.runctx("testJobCreator.algorithm(parameters = input)", globals(), locals(), filename="workStats.stat") p = pstats.Stats('workStats.stat') p.sort_stats('cumulative') p.print_stats(.2) return
def create(self, group): """ _create_ Write the job to the database. """ if self["id"] is not None: return existingTransaction = self.beginTransaction() self["jobgroup"] = group.id if self["name"] is None: self["name"] = makeUUID() jobAction = self.daofactory(classname="Jobs.New") jobAction.execute(jobgroup=self["jobgroup"], name=self["name"], couch_record=self["couch_record"], location=self["location"], cache_dir=self['cache_dir'], outcome=self['outcome'], fwjr=self['fwjr'], conn=self.getDBConn(), transaction=self.existingTransaction()) self.exists() self['mask'].save(jobID=self['id']) self.associateFiles() self.associateWorkUnits() self.commitTransaction(existingTransaction) return
def test05(self): """ _test05_ Test multi lumis express merges with holes """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return
def test05(self): """ _test05_ Test repacking of multiple lumis with holes in the lumi sequence Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") return
def testFileset(self): """ _testFileset_ Verify that converting an ACDC fileset to a DataStructs fileset works correctly. """ testCollection = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testFileset = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFileset") testCollection.addFileset(testFileset) testFiles = {} for i in range(5): lfn = makeUUID() testFile = File(lfn=lfn, size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFiles[lfn] = testFile testFileset.add([testFile]) for file in testFileset.fileset().files: self.assertTrue(file["lfn"] in testFiles.keys(), "Error: File missing.") self.assertEqual(file["events"], testFiles[file["lfn"]]["events"], "Error: Wrong number of events.") self.assertEqual(file["size"], testFiles[file["lfn"]]["size"], "Error: Wrong file size.") return
def algorithm(self, *args, **kwargs): """ _algorithm_ Implement merge algorithm for the subscription provided """ fileset = list(self.subscription.availableFiles()) mergeSize = int(kwargs['merge_size']) overflow = bool(kwargs.get('all_files', False)) fileset.sort() accumSize = 0 jobFiles = Fileset() locationDict = self.sortByLocation() for location in locationDict: baseName = makeUUID() self.newGroup() for f in locationDict[location]: accumSize += f['size'] jobFiles.addFile(f) if accumSize >= mergeSize: self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1), files = jobFiles) self.currentJob["mask"].setMaxAndSkipEvents(-1, 0) accumSize = 0 jobFiles = Fileset() if len(jobFiles) > 0: if overflow: self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1), files = jobFiles) self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T1_US_FNAL_Disk') newFile.create() testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T2_CH_CERN') newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing") testSubscription.create() return testSubscription
def algorithm(self, *args, **kwargs): """ _algorithm_ A time delay job splitting algorithm, will shove all unacquired files into a new job if the trigger_time has been passed """ # // # // get the fileset #// fileset = self.subscription.getFileset() trigger_time = int(kwargs['trigger_time']) if (trigger_time < time.time()): availFiles = self.subscription.availableFiles() if (len(availFiles) == 0): # no files to acquire return [] baseName = makeUUID() self.newGroup() self.newJob(name = '%s-endofrun' % (baseName,)) for f in availFiles: self.currentJob.addFile(f)
def test06(self): """ _test06_ Test repacking of 3 lumis 2 small lumis (single job), followed by a big one (multiple jobs) files for lumi 1 and 2 are below multi-lumi thresholds files for lumi 3 are above single-lumi threshold """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3]: filecount = 2 for i in range(filecount): if lumi == 3: nevents = 500 else: nevents = 100 newFile = File(makeUUID(), size = 1000, events = nevents) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxLatency'] = 50000 mySplitArgs['maxInputEvents'] = 900 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1, "ERROR: second job does not process 1 file") self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1, "ERROR: third job does not process 1 file") return
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testWorkflow = Workflow(spec="spec.xml", owner="dmwm", name="testWorkflow_%s" % baseName[:4], task="Test") testWorkflow.create() testFileset = Fileset(name=baseName) for i in range(nFiles): newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T1_US_FNAL_Disk') testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile, i, lumisPerFile, 'T2_CH_CERN') testFileset.addFile(newFile) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="EventAwareLumiByWork", type="Processing") testSubscription.create() return testSubscription
def test03(self): """ _test03_ Test single lumi event threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxLatency'] = 50000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 650 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="site1", pnn="T2_CH_CERN") locationAction.execute(siteName="site2", pnn="T1_US_FNAL_Disk") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleSiteFileset = Fileset(name="TestFileset3") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation("T2_CH_CERN") newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation(["T2_CH_CERN", "T1_US_FNAL_Disk"]) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="SizeBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="SizeBased", type="Processing") self.singleFileSubscription.create() self.multipleSiteSubscription = Subscription( fileset=self.multipleSiteFileset, workflow=testWorkflow, split_algo="SizeBased", type="Processing") self.multipleSiteSubscription.create() return
def test10(self): """ _test10_ Test merging of multiple lumis with holes in the lumi sequence Hole is due to no streamer files for the lumi Multi lumi input """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 5]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['maxInputEvents'] = 500 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 4, 'STREAM' : "A", 'FILECOUNT' : 1, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) self.feedStreamersDAO.execute(transaction = False) self.fileset1.loadData() jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") for fileid in self.fileset1.getFiles(type = 'id'): self.acquireFilesDAO.execute(self.subscription1['id'], fileid, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") for fileid in self.fileset1.getFiles(type = 'id'): self.completeFilesDAO.execute(self.subscription1['id'], fileid, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
def testChunking(self): """ _testChunking_ Insert a workload and files that have several distinct sets of locations. Verify that the chunks are created correctly and that they only groups files that have the same set of locations. Also verify that the chunks are pulled out of ACDC correctly. """ dcs = DataCollectionService(url=self.testInit.couchUrl, database="wmcore-acdc-datacollectionsvc") testFileA = File(lfn=makeUUID(), size=1024, events=1024) testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileA.addRun(Run(1, 1, 2)) testFileB = File(lfn=makeUUID(), size=1024, events=1024) testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileB.addRun(Run(1, 3, 4)) testFileC = File(lfn=makeUUID(), size=1024, events=1024) testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileC.addRun(Run(1, 5, 6)) testJobA = self.getMinimalJob() testJobA.addFile(testFileA) testJobA.addFile(testFileB) testJobA.addFile(testFileC) testFileD = File(lfn=makeUUID(), size=1024, events=1024) testFileD.setLocation(["cmssrm.fnal.gov"]) testFileD.addRun(Run(2, 1, 2)) testFileE = File(lfn=makeUUID(), size=1024, events=1024) testFileE.setLocation(["cmssrm.fnal.gov"]) testFileE.addRun(Run(2, 3, 4)) testJobB = self.getMinimalJob() testJobB.addFile(testFileD) testJobB.addFile(testFileE) testFileF = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/F"}) testFileF.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"]) testFileF.addRun(Run(3, 1, 2)) testFileG = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/G"}) testFileG.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"]) testFileG.addRun(Run(3, 3, 4)) testFileH = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/H"}) testFileH.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"]) testFileH.addRun(Run(3, 5, 6)) testJobC = self.getMinimalJob() testJobC.addFile(testFileF) testJobC.addFile(testFileG) testJobC.addFile(testFileH) testFileI = File(lfn=makeUUID(), size=1024, events=1024, merged=True) testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileI.addRun(Run(4, 1, 2)) testFileJ = File(lfn=makeUUID(), size=1024, events=1024, merged=True) testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileJ.addRun(Run(4, 3, 4)) testFileK = File(lfn=makeUUID(), size=1024, events=1024, merged=True) testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"]) testFileK.addRun(Run(4, 5, 6)) testJobD = self.getMinimalJob() testJobD.addFile(testFileI) testJobD.addFile(testFileJ) testJobD.addFile(testFileK) dcs.failedJobs([testJobA, testJobB, testJobC, testJobD]) chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco", chunkSize=5) self.assertEqual(len(chunks), 4, "Error: There should be four chunks: %s" % len(chunks)) goldenMetaData = {1: {"lumis": 2, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 1024}, 2: {"lumis": 4, "locations": ["cmssrm.fnal.gov"], "events": 2048}, 3: {"lumis": 6, "locations": ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"], "events": 3072}, 5: {"lumis": 10, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 5120}} testFiles = [testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK] lastFile = testFileA for testFile in testFiles: if lastFile["lfn"] < testFile["lfn"]: lastFile = testFile testFiles.remove(lastFile) goldenFiles = {1: [lastFile], 2: [testFileD, testFileE], 3: [testFileF, testFileG, testFileH], 5: testFiles} for chunk in chunks: chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco", chunk["offset"], chunk["files"]) self.assertEqual(chunkMetaData["files"], chunk["files"]) self.assertEqual(chunkMetaData["lumis"], chunk["lumis"]) self.assertEqual(chunkMetaData["events"], chunk["events"]) self.assertEqual(chunkMetaData["locations"], chunk["locations"]) self.assertTrue(chunk["files"] in goldenMetaData.keys(), "Error: Extra chunk found.") self.assertEqual(chunk["lumis"], goldenMetaData[chunk["files"]]["lumis"], "Error: Lumis in chunk is wrong.") self.assertEqual(chunk["locations"], goldenMetaData[chunk["files"]]["locations"], "Error: Locations in chunk is wrong.") self.assertEqual(chunk["events"], goldenMetaData[chunk["files"]]["events"], "Error: Events in chunk is wrong.") del goldenMetaData[chunk["files"]] chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco", chunk["offset"], chunk["files"]) self.assertTrue(chunk["files"] in goldenFiles.keys(), "Error: Extra chunk found.") goldenChunkFiles = goldenFiles[chunk["files"]] self.assertEqual(len(chunkFiles), len(goldenChunkFiles)) for chunkFile in chunkFiles: foundFile = None for goldenChunkFile in goldenChunkFiles: if chunkFile["lfn"] == goldenChunkFile["lfn"]: foundFile = goldenChunkFile break self.assertIsNotNone(foundFile, "Error: Missing chunk file: %s, %s" % (chunkFiles, goldenChunkFiles)) self.assertEqual(foundFile["parents"], chunkFile["parents"], "Error: File parents should match.") self.assertEqual(foundFile["merged"], chunkFile["merged"], "Error: File merged status should match.") self.assertEqual(foundFile["locations"], chunkFile["locations"], "Error: File locations should match.") self.assertEqual(foundFile["events"], chunkFile["events"]) self.assertEqual(foundFile["size"], chunkFile["size"]) self.assertEqual(len(foundFile["runs"]), len(chunkFile["runs"]), "Error: Wrong number of runs.") for run in foundFile["runs"]: runMatch = False for chunkRun in chunkFile["runs"]: if chunkRun.run == run.run and chunkRun.lumis == run.lumis: runMatch = True break self.assertTrue(runMatch, "Error: Run information is wrong.") del goldenFiles[chunk["files"]] singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco") self.assertEqual(singleChunk, {"offset": 0, "files": 11, "events": 11264, "lumis": 22, "locations": {"castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"}}, "Error: Single chunk metadata is wrong") return
def createFilesWithChildren(self, moreParentFiles, acqEra): """ _createFilesWithChildren_ Create several parentless files and then create child files. """ parentFiles = [] childFiles = [] baseLFN = "/store/data/%s/Cosmics/RAW/v1/000/143/316/" % acqEra for i in range(10): testFile = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root", size=1024, events=20, checksums={"cksum": 1}) testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1", appFam="RAW", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/%s-v1/RAW" % acqEra) testFile['block_close_max_wait_time'] = 1000000 testFile['block_close_max_events'] = 1000000 testFile['block_close_max_size'] = 1000000 testFile['block_close_max_files'] = 1000000 lumis = [] for j in range(10): lumis.append((i * 10) + j) testFile.addRun(Run(143316, *lumis)) testFile.setAcquisitionEra(acqEra) testFile.setProcessingVer("1") testFile.setGlobalTag("START54::All") testFile.create() testFile.setLocation("malpaquet") parentFiles.append(testFile) baseLFN = "/store/data/%s/Cosmics/RECO/v1/000/143/316/" % acqEra for i in range(5): testFile = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root", size=1024, events=20, checksums={"cksum": 1}) testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFile.setDatasetPath("/Cosmics/%s-v1/RECO" % acqEra) testFile['block_close_max_wait_time'] = 1000000 testFile['block_close_max_events'] = 1000000 testFile['block_close_max_size'] = 1000000 testFile['block_close_max_files'] = 1000000 lumis = [] for j in range(20): lumis.append((i * 20) + j) testFile.addRun(Run(143316, *lumis)) testFile.setAcquisitionEra(acqEra) testFile.setProcessingVer("1") testFile.setGlobalTag("START54::All") testFile.create() testFile.setLocation("malpaquet") testFile.addParents([parentFiles[i * 2]["lfn"], parentFiles[i * 2 + 1]["lfn"]]) testFile.addParents([moreParentFiles[i * 2]["lfn"], moreParentFiles[i * 2 + 1]["lfn"]]) childFiles.append(testFile) return parentFiles, childFiles
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec=makeUUID(), owner="Steve", name=makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJobA = Job(name=makeUUID(), files=[]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroup) testJobA["state"] = "executing" testJobB = Job(name=makeUUID(), files=[]) testJobB["couch_record"] = makeUUID() testJobB.create(group=testJobGroup) testJobB["state"] = "complete" testJobC = Job(name=makeUUID(), files=[]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory( classname="Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def test03(self): """ _test03_ Test single lumi event threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 650 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return
def algorithm(self, *args, **kwargs): """ _algorithm_ Run the discovery query and generate jobs if we find enough files. """ # This doesn't use a proxy self.grabByProxy = False filesPerJob = int(kwargs.get("files_per_job", 10)) myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) fileAvail = daoFactory(classname = "Subscriptions.SiblingSubscriptionsComplete") completeFiles = fileAvail.execute(self.subscription["id"], conn = myThread.transaction.conn, transaction = True) self.subscription["fileset"].load() if self.subscription["fileset"].open == True: filesetClosed = False else: fileFailed = daoFactory(classname = "Subscriptions.SiblingSubscriptionsFailed") fileFailed.execute(self.subscription["id"], self.subscription["fileset"].id, conn = myThread.transaction.conn, transaction = True) filesetClosed = True fileSites = {} foundFiles = [] for completeFile in completeFiles: if completeFile["lfn"] not in foundFiles: foundFiles.append(completeFile["lfn"]) else: continue if completeFile["pnn"] not in fileSites: fileSites[completeFile["pnn"]] = [] fileSites[completeFile["pnn"]].append(completeFile) for siteName in fileSites: if len(fileSites[siteName]) < filesPerJob and not filesetClosed: continue self.newGroup() while len(fileSites[siteName]) >= filesPerJob: self.newJob(name = makeUUID()) for jobFile in fileSites[siteName][0:filesPerJob]: newFile = File(id = jobFile["id"], lfn = jobFile["lfn"], events = jobFile["events"]) newFile["locations"] = set([jobFile["pnn"]]) self.currentJob.addFile(newFile) fileSites[siteName] = fileSites[siteName][filesPerJob:] if filesetClosed and len(fileSites[siteName]) > 0: self.newJob(name = makeUUID()) for jobFile in fileSites[siteName]: newFile = File(id = jobFile["id"], lfn = jobFile["lfn"], events = jobFile["events"]) newFile["locations"] = set([jobFile["pnn"]]) self.currentJob.addFile(newFile) return
def populateWMBS(self): """ _populateWMBS_ Create files and subscriptions in WMBS """ myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName='s1', pnn="T1_US_FNAL_Disk") locationAction.execute(siteName='s2', pnn="T2_CH_CERN") self.validLocations = ["T1_US_FNAL_Disk", "T2_CH_CERN"] testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() parentFile = File('/parent/lfn/', size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) parentFile.create() for _ in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) newFile.create() newFile.addParent(lfn=parentFile['lfn']) self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.multipleFileSubscription = Subscription(fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") self.multipleFileSubscription.create() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.singleFileSubscription = Subscription(fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") self.singleFileSubscription.create() self.multipleSiteFileset = Fileset(name="TestFileset3") self.multipleSiteFileset.create() for _ in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation("T1_US_FNAL_Disk") newFile.create() self.multipleSiteFileset.addFile(newFile) for _ in range(5): newFile = File(makeUUID(), size=1000, events=100) newFile.setLocation(["T1_US_FNAL_Disk", "T2_CH_CERN"]) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() self.multipleSiteSubscription = Subscription(fileset=self.multipleSiteFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") self.multipleSiteSubscription.create() return
def testAutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == 'mysql': return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def createTestJobGroup(self, name="TestWorkthrough", specLocation="spec.xml", error=False, task="/TestWorkload/ReReco", nJobs=10): """ _createTestJobGroup_ Generate a test WMBS JobGroup with real FWJRs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=specLocation, owner="Simon", name=name, task=task) testWorkflow.create() testWMBSFileset = Fileset(name=name) testWMBSFileset.create() testFileA = File(lfn=makeUUID(), size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn=makeUUID(), size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() report = Report() if error: path = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") else: path = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "PerformanceReport2.pkl") report.load(filename=path) self.changeState.propagate(testJobGroup.jobs, 'created', 'new') self.changeState.propagate(testJobGroup.jobs, 'executing', 'created') self.changeState.propagate(testJobGroup.jobs, 'complete', 'executing') for job in testJobGroup.jobs: job['fwjr'] = report self.changeState.propagate(testJobGroup.jobs, 'jobfailed', 'complete') self.changeState.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed') self.changeState.propagate(testJobGroup.jobs, 'exhausted', 'retrydone') self.changeState.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup
def createJobs(self): """ _createJobs_ Create test jobs in WMBS and BossAir """ testWorkflow = Workflow(spec=makeUUID(), owner="tapas", name=makeUUID(), task="Test") testWorkflow.create() testFilesetA = Fileset(name="TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name="TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name="TestFilesetC") testFilesetC.create() testFileA = File(lfn="testFileA", locations=set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset=testFilesetA, workflow=testWorkflow, type="Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{ "site_name": "testSite1", "valid": True }]) testSubscriptionB = Subscription(fileset=testFilesetB, workflow=testWorkflow, type="Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{ "site_name": "testSite1", "valid": False }]) testSubscriptionC = Subscription(fileset=testFilesetC, workflow=testWorkflow, type="Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription=testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription=testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription=testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name="testJobA", files=[testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name="testJobB", files=[testFileA]) testJobB["couch_record"] = makeUUID() testJobB["cache_dir"] = self.tempDir testJobB.create(group=testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name="testJobC", files=[testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name="testJobD", files=[testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group=testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name="testJobE", files=[testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group=testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name="testJobF", files=[testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group=testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name="testJobG", files=[testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group=testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name="testJobH", files=[testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group=testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name="testJobI", files=[testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group=testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name="testJobJ", files=[testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group=testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[ testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname="Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite2") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") return
def test06(self): """ _test06_ Test repacking of 3 lumis 2 small lumis (single job), followed by a big one (multiple jobs) files for lumi 1 and 2 are below multi-lumi thresholds files for lumi 3 are above single-lumi threshold """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3]: filecount = 2 for i in range(filecount): if lumi == 3: nevents = 500 else: nevents = 100 newFile = File(makeUUID(), size=1000, events=nevents) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) mySplitArgs['maxInputEvents'] = 900 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1, "ERROR: second job does not process 1 file") self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1, "ERROR: third job does not process 1 file") return
def test00(self): """ _test00_ Test that the job name prefix feature works Test multi lumi size threshold Multi lumi input """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi'] jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxSizeMultiLumi'] = 5000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Repack-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset1.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create a single job") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("Repack-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.assertEqual(self.getNumActiveSplitLumis(), 0, "ERROR: Split lumis were created") return
def test05(self): """ _test05_ Test repacking of multiple lumis with holes in the lumi sequence Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append({ 'RUN': 1, 'LUMI': lumi, 'STREAM': "A", 'FILECOUNT': filecount, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }) self.fileset1.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1) self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds, transaction=False) mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': 3, 'STREAM': "A", 'FILECOUNT': 0, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") return
def testDualUpload(self): """ _testDualUpload_ Verify that the dual upload mode works correctly. """ self.dbsApi = DbsApi(url=self.dbsUrl) config = self.getConfig() dbsUploader = DBSUploadPoller(config=config) dbsUtil = DBSBufferUtil() # First test verifies that uploader will poll and then not do anything # as the database is empty. dbsUploader.algorithm() acqEra = "Summer%s" % (int(time.time())) parentFiles = self.createParentFiles(acqEra) (moreParentFiles, childFiles) = \ self.createFilesWithChildren(parentFiles, acqEra) allFiles = parentFiles + moreParentFiles allBlocks = [] for i in range(4): DBSBufferDataset(parentFiles[0]["datasetPath"]).create() blockName = parentFiles[0]["datasetPath"] + "#" + makeUUID() dbsBlock = DBSBufferBlock(blockName, location="malpaquet", datasetpath=None) dbsBlock.status = "Open" dbsBlock.setDataset(parentFiles[0]["datasetPath"], 'data', 'VALID') dbsUtil.createBlocks([dbsBlock]) for fileObj in allFiles[i * 5: (i * 5) + 5]: dbsBlock.addFile(fileObj, 'data', 'VALID') dbsUtil.setBlockFiles({"block": blockName, "filelfn": fileObj["lfn"]}) if i < 2: dbsBlock.status = "InDBS" dbsUtil.updateBlocks([dbsBlock]) dbsUtil.updateFileStatus([dbsBlock], "InDBS") allBlocks.append(dbsBlock) DBSBufferDataset(childFiles[0]["datasetPath"]).create() blockName = childFiles[0]["datasetPath"] + "#" + makeUUID() dbsBlock = DBSBufferBlock(blockName, location="malpaquet", datasetpath=None) dbsBlock.status = "InDBS" dbsBlock.setDataset(childFiles[0]["datasetPath"], 'data', 'VALID') dbsUtil.createBlocks([dbsBlock]) for fileObj in childFiles: dbsBlock.addFile(fileObj, 'data', 'VALID') dbsUtil.setBlockFiles({"block": blockName, "filelfn": fileObj["lfn"]}) dbsUtil.updateFileStatus([dbsBlock], "InDBS") dbsUploader.algorithm() time.sleep(5) dbsUploader.algorithm() time.sleep(5) self.verifyData(parentFiles[0]["datasetPath"], parentFiles) # Change the status of the rest of the parent blocks so we can upload # them and the children. for dbsBlock in allBlocks: dbsBlock.status = "InDBS" dbsUtil.updateBlocks([dbsBlock]) dbsUploader.algorithm() time.sleep(5) self.verifyData(parentFiles[0]["datasetPath"], parentFiles + moreParentFiles) # Run the uploader one more time to upload the children. dbsUploader.algorithm() time.sleep(5) self.verifyData(childFiles[0]["datasetPath"], childFiles) return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.multipleFileFileset = Fileset(name="TestFileset1") for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.addRun(Run(i, *[45 + i])) self.multipleFileFileset.addFile(newFile) self.singleFileFileset = Fileset(name="TestFileset2") newFile = File("/some/file/name", size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.addRun(Run(1, *[45])) self.singleFileFileset.addFile(newFile) self.multipleFileRunset = Fileset(name="TestFileset3") for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.addRun(Run(i // 3, *[45])) self.multipleFileRunset.addFile(newFile) self.singleRunFileset = Fileset(name="TestFileset4") for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["somese.cern.ch"])) newFile.addRun(Run(1, *[45])) self.singleRunFileset.addFile(newFile) testWorkflow = Workflow() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.multipleRunSubscription = Subscription( fileset=self.multipleFileRunset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleRunSubscription = Subscription( fileset=self.singleRunFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") return
def __call__(self, wmbsJob): wmbsJob['id'] = "%s/%s" % (self.task.getPathName(), makeUUID()) wmbsJob['name'] = "%s/%s" % (self.task.getPathName(), makeUUID())
def testC_ACDCTest(self): """ _ACDCTest_ Test whether we can get a goodRunList out of ACDC and process it correctly. """ workload = self.createTestWorkload() dcs = DataCollectionService(url=self.testInit.couchUrl, database=self.testInit.couchDbName) testFileA = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileA.addRun(Run(1, 1, 2)) testFileA.create() testFileB = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileB.addRun(Run(1, 3)) testFileB.create() testJobA = getJob(workload) testJobA.addFile(testFileA) testJobA.addFile(testFileB) testFileC = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileC.addRun(Run(1, 4, 6)) testFileC.create() testJobB = getJob(workload) testJobB.addFile(testFileC) testFileD = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileD.addRun(Run(1, 7)) testFileD.create() testJobC = getJob(workload) testJobC.addFile(testFileD) testFileE = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileE.addRun(Run(1, 11, 12)) testFileE.create() testJobD = getJob(workload) testJobD.addFile(testFileE) testFileF = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileF.addRun(Run(2, 5, 6, 7)) testFileF.create() testJobE = getJob(workload) testJobE.addFile(testFileF) testFileG = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileG.addRun(Run(2, 10, 11, 12)) testFileG.create() testJobF = getJob(workload) testJobF.addFile(testFileG) testFileH = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileH.addRun(Run(2, 15)) testFileH.create() testJobG = getJob(workload) testJobG.addFile(testFileH) testFileI = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileI.addRun(Run(3, 20)) testFileI.create() testJobH = getJob(workload) testJobH.addFile(testFileI) testFileJ = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk") testFileJ.addRun(Run(1, 9)) testFileJ.create() testJobI = getJob(workload) testJobI.addFile(testFileJ) # dcs.failedJobs([testJobA, testJobB, testJobC, testJobD, testJobE, # testJobF, testJobG, testJobH, testJobI]) dcs.failedJobs([testJobA, testJobD, testJobH]) baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.addFile(testFileD) testFileset.addFile(testFileE) testFileset.addFile(testFileF) testFileset.addFile(testFileG) testFileset.addFile(testFileH) testFileset.addFile(testFileI) testFileset.addFile(testFileJ) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="LumiBased", type="Processing") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroups = jobFactory( lumis_per_job=100, halt_job_on_file_boundaries=False, splitOnRun=True, collectionName=workload.name(), filesetName=workload.getTask("reco").getPathName(), owner="evansde77", group="DMWM", couchURL=self.testInit.couchUrl, couchDB=self.testInit.couchDbName, performance=self.performanceParams) self.assertEqual(jobGroups[0].jobs[0]['mask'].getRunAndLumis(), {1: [[1, 2], [3, 3], [11, 12]]}) self.assertEqual(jobGroups[0].jobs[1]['mask'].getRunAndLumis(), {3: [[20, 20]]}) return
def createTestJobGroup(self, nJobs=10, retry_count=1, workloadPath='test', fwjrPath=None, workloadName=makeUUID(), fileModifier=''): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec=workloadPath, owner="cmsdataops", group="cmsdataops", name=workloadName, task="/TestWorkload/ReReco") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier, size=1024, events=10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('T2_CH_CERN') testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier, size=1024, events=10, first_event=88) testFileA.addRun(Run(10, *[12312, 12313])) testFileA.setLocation('T2_CH_CERN') testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier, size=1024, events=10, first_event=88) testFileB.addRun(Run(10, *[12314, 12315, 12316])) testFileB.setLocation('T2_CH_CERN') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier) testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier) for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312]) testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316]) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) testJob['fwjr_path'] = fwjrPath os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJob.create(group=testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files=[testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def testCompleteJobInput(self): """ _testCompleteJobInput_ Verify the correct output of the CompleteInput DAO. This should mark the input for a job as complete once all the jobs that run over a particular file have complete successfully. """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") bogusWorkflow = Workflow(spec="spec1.xml", owner="Steve", name="wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name="TestFileset") bogusFileset = Fileset(name="BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) bogusSubscription = Subscription(fileset=bogusFileset, workflow=bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.acquireFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription=testSubscription) bogusJobGroup = JobGroup(subscription=bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name="TestJobA", files=[testFileA]) testJobB = Job(name="TestJobB", files=[testFileA, testFileB]) testJobC = Job(name="TestJobC", files=[testFileC]) bogusJob = Job(name="BogusJob", files=[testFileA, testFileB, testFileC]) testJobA.create(group=testJobGroup) testJobB.create(group=testJobGroup) testJobC.create(group=testJobGroup) bogusJob.create(group=bogusJobGroup) testJobA["outcome"] = "success" testJobB["outcome"] = "failure" testJobC["outcome"] = "success" testJobA.save() testJobB.save() testJobC.save() testJobA.completeInputFiles() compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles testJobB["outcome"] = "success" testJobB.save() testJobB.completeInputFiles(skipFiles=[testFileB["lfn"]]) availFiles = len(testSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(testSubscription.filesOfStatus("Acquired")) assert acqFiles == 1, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 1, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(testSubscription.filesOfStatus("Failed")) assert failFiles == 1, \ "Error: test sub has wrong number of failed files: %s" % failFiles availFiles = len(bogusSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(bogusSubscription.filesOfStatus("Acquired")) assert acqFiles == 3, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(bogusSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(bogusSubscription.filesOfStatus("Failed")) assert failFiles == 0, \ "Error: test sub has wrong number of failed files: %s" % failFiles return
def testGetLumiWhitelist(self): """ _testGetLumiWhitelist_ Verify that the ACDC whitelist generation code works correctly. We'll add jobs with the following lumi info: # Run 1, lumis [1, 2, 3], [4, 6], [7], [9], [11, 12] # Run 2, lumis [5, 6, 7], [10, 11, 12], [15] # Run 3, lumis [20] And should get out a whitelist that looks like this: {"1": [[1, 4], [6, 7], [9, 9], [11, 12]], "2": [[5, 7], [10, 12], [15, 15]], "3": [[20, 20]]} """ dcs = DataCollectionService(url=self.testInit.couchUrl, database="wmcore-acdc-datacollectionsvc") testFileA = File(lfn=makeUUID(), size=1024, events=1024) testFileA.addRun(Run(1, 1, 2)) testFileB = File(lfn=makeUUID(), size=1024, events=1024) testFileB.addRun(Run(1, 3)) testJobA = self.getMinimalJob() testJobA.addFile(testFileA) testJobA.addFile(testFileB) testFileC = File(lfn=makeUUID(), size=1024, events=1024) testFileC.addRun(Run(1, 4, 6)) testJobB = self.getMinimalJob() testJobB.addFile(testFileC) testFileD = File(lfn=makeUUID(), size=1024, events=1024) testFileD.addRun(Run(1, 7)) testJobC = self.getMinimalJob() testJobC.addFile(testFileD) testFileE = File(lfn=makeUUID(), size=1024, events=1024) testFileE.addRun(Run(1, 11, 12)) testJobD = self.getMinimalJob() testJobD.addFile(testFileE) testFileF = File(lfn=makeUUID(), size=1024, events=1024) testFileF.addRun(Run(2, 5, 6, 7)) testJobE = self.getMinimalJob() testJobE.addFile(testFileF) testFileG = File(lfn=makeUUID(), size=1024, events=1024) testFileG.addRun(Run(2, 10, 11, 12)) testJobF = self.getMinimalJob() testJobF.addFile(testFileG) testFileH = File(lfn=makeUUID(), size=1024, events=1024) testFileH.addRun(Run(2, 15)) testJobG = self.getMinimalJob() testJobG.addFile(testFileH) testFileI = File(lfn=makeUUID(), size=1024, events=1024) testFileI.addRun(Run(3, 20)) testJobH = self.getMinimalJob() testJobH.addFile(testFileI) testFileJ = File(lfn=makeUUID(), size=1024, events=1024) testFileJ.addRun(Run(1, 9)) testJobI = self.getMinimalJob() testJobI.addFile(testFileJ) dcs.failedJobs([testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI]) whiteList = dcs.getLumiWhitelist("ACDCTest", "/ACDCTest/reco") self.assertEqual(len(whiteList.keys()), 3, "Error: There should be 3 runs.") self.assertEqual(whiteList["1"], [[1, 4], [6, 7], [9, 9], [11, 12]], "Error: Whitelist for run 1 is wrong.") self.assertEqual(whiteList["2"], [[5, 7], [10, 12], [15, 15]], "Error: Whitelist for run 2 is wrong.") self.assertEqual(whiteList["3"], [[20, 20]], "Error: Whitelist for run 3 is wrong.") correctLumiList = LumiList(compactList={"1": [[1, 4], [6, 7], [9, 9], [11, 12]], "2": [[5, 7], [10, 12], [15, 15]], "3": [[20, 20]]}) testLumiList = dcs.getLumilistWhitelist("ACDCTest", "/ACDCTest/reco") self.assertEqual(correctLumiList.getCMSSWString(), testLumiList.getCMSSWString()) return
def bulkCommit(self, jobGroups): """ _bulkCommit_ Commits all objects created during job splitting. This is dangerous because it assumes that you can pass in all jobGroups. """ jobList = [] jobGroupList = [] nameList = [] # You have to do things in this order: # 1) First create Filesets, then jobGroups # 2) Second, create jobs pointing to jobGroups # 3) Deal with masks, etc. # First, do we exist? We better # This happens in its own transaction if self['id'] == -1: self.create() existingTransaction = self.beginTransaction() # You need to create a number of Filesets equal to the # number of jobGroups. for _ in jobGroups: # Make a random name for each fileset nameList.append(makeUUID()) # Create filesets action = self.daofactory(classname="Fileset.BulkNewReturn") fsIDs = action.execute(nameList=nameList, open=True, conn=self.getDBConn(), transaction=self.existingTransaction()) for jobGroup in jobGroups: jobGroup.uid = makeUUID() jobGroupList.append({'subscription': self['id'], 'uid': jobGroup.uid, 'output': fsIDs.pop()}) action = self.daofactory(classname="JobGroup.BulkNewReturn") jgIDs = action.execute(bulkInput=jobGroupList, conn=self.getDBConn(), transaction=self.existingTransaction()) for jobGroup in jobGroups: for idUID in jgIDs: # This should assign an ID to the right job if jobGroup.uid == idUID['guid']: jobGroup.id = idUID['id'] break for jobGroup in jobGroups: for job in jobGroup.newjobs: if job["id"] is not None: continue job["jobgroup"] = jobGroup.id if job["name"] is None: job["name"] = makeUUID() jobList.append(job) bulkAction = self.daofactory(classname="Jobs.New") result = bulkAction.execute(jobList=jobList, conn=self.getDBConn(), transaction=self.existingTransaction()) # Move jobs to jobs from newjobs for jobGroup in jobGroups: jobGroup.jobs.extend(jobGroup.newjobs) jobGroup.newjobs = [] # Use the results of the bulk commit to get the jobIDs fileDict = {} jobFileRunLumis = [] for job in jobList: job['id'] = result[job['name']] fileDict[job['id']] = [] for f in job['input_files']: fileDict[job['id']].append(f['id']) fileMask = job['mask'].filterRunLumisByMask(runs=f['runs']) for runObj in fileMask: run = runObj.run lumis = runObj.lumis for lumi in lumis: jobFileRunLumis.append((job['id'], f['id'], run, lumi)) # Create a list of mask binds maskList = [] for job in jobList: mask = job['mask'] if len(list(mask['runAndLumis'].keys())) > 0: # Then we have multiple binds binds = mask.produceCommitBinds(jobID=job['id']) maskList.extend(binds) else: mask['jobID'] = job['id'] maskList.append(mask) maskAction = self.daofactory(classname="Masks.Save") maskAction.execute(jobid=None, mask=maskList, conn=self.getDBConn(), transaction=self.existingTransaction()) fileAction = self.daofactory(classname="Jobs.AddFiles") fileAction.execute(jobDict=fileDict, conn=self.getDBConn(), transaction=self.existingTransaction()) # wfid = self['workflow'].id # Add work units and associate them # wuAction = self.daofactory(classname='WorkUnit.Add') # wufAction = self.daofactory(classname='Jobs.AddWorkUnits') # Make a count of how many times each job appears in the list of jobFileRunLumis # jobUnitCounts = Counter([jid for jid, _, _, _ in jobFileRunLumis]) # for jid, fid, run, lumi in jobFileRunLumis: # wuAction.execute(taskid=wfid, fileid=fid, run=run, lumi=lumi, last_unit_count=jobUnitCounts[jid], # conn=self.getDBConn(), transaction=self.existingTransaction()) # wufAction.execute(jobFileRunLumis=jobFileRunLumis, # conn=self.getDBConn(), transaction=self.existingTransaction()) fileList = [] for job in jobList: fileList.extend(job['input_files']) self.acquireFiles(files=fileList) self.commitTransaction(existingTransaction) return
def testCreatePopulateDrop(self): """ _testCreatePopulateDrop_ Test creating, populating and dropping a collection. """ testCollectionA = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="Thunderstruck") testCollectionB = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="StruckThunder") testCollectionA.create() testCollectionB.create() # There should be nothing in couch. Documents are only added for # filesets and files. testFilesA = [] for i in range(5): testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFilesA.append(testFile) testFilesB = [] for i in range(10): testFile = File(lfn=makeUUID(), size=random.randint(1024, 4096), events=random.randint(1024, 4096)) testFilesB.append(testFile) testFilesetA = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetA") testFilesetB = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetB") testFilesetC = CouchFileset(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="TestFilesetC") testCollectionA.addFileset(testFilesetA) testCollectionB.addFileset(testFilesetB) testCollectionB.addFileset(testFilesetC) testFilesetA.add(testFilesA) testFilesetB.add(testFilesA) testFilesetC.add(testFilesA) testFilesetC.add(testFilesB) # Drop testCollectionA testCollectionA.drop() # Try to populate testFilesetA testCollectionC = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="ThunderStruck") testCollectionC.populate() self.assertEqual( len(testCollectionC["filesets"]), 0, "Error: There should be no filesets in this collect.") # Try to populate testFilesetB testCollectionD = CouchCollection(database=self.testInit.couchDbName, url=self.testInit.couchUrl, name="StruckThunder") testCollectionD.populate() for fileset in testCollectionD["filesets"]: testFiles = testFilesA if fileset["name"] == "TestFilesetC": testFiles.extend(testFilesB) self.assertEqual(len(testFiles), len(fileset.files.keys()), "Error: Wrong number of files in fileset.") for testFile in testFiles: self.assertTrue(testFile["lfn"] in fileset.files.keys(), "Error: File is missing.") self.assertEqual(testFile["events"], fileset.files[testFile["lfn"]]["events"], "Error: Wrong number of events.") self.assertEqual(testFile["size"], fileset.files[testFile["lfn"]]["size"], "Error: Wrong file size.") return
def stuffDatabase(self): """ _stuffDatabase_ Fill the dbsbuffer with some files and blocks. We'll insert a total of 5 files spanning two blocks. There will be a total of two datasets inserted into the datbase. We'll inject files with the location set as an SE name as well as a PhEDEx node name as well. """ myThread = threading.currentThread() buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) insertWorkflow = buffer3Factory(classname="InsertWorkflow") insertWorkflow.execute("BogusRequest", "BogusTask", 0, 0, 0, 0) checksums = {"adler32": "1234", "cksum": "5678"} testFileA = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileA.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileA.setDatasetPath(self.testDatasetA) testFileA.addRun(Run(2, *[45])) testFileA.create() testFileB = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileB.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileB.setDatasetPath(self.testDatasetA) testFileB.addRun(Run(2, *[45])) testFileB.create() testFileC = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileC.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileC.setDatasetPath(self.testDatasetA) testFileC.addRun(Run(2, *[45])) testFileC.create() self.testFilesA.append(testFileA) self.testFilesA.append(testFileB) self.testFilesA.append(testFileC) testFileD = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileD.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileD.setDatasetPath(self.testDatasetB) testFileD.addRun(Run(2, *[45])) testFileD.create() testFileE = DBSBufferFile(lfn=makeUUID(), size=1024, events=10, checksums=checksums, locations=set(["srm-cms.cern.ch"])) testFileE.setAlgorithm(appName="cmsRun", appVer="CMSSW_2_1_8", appFam="RECO", psetHash="GIBBERISH", configContent="MOREGIBBERISH") testFileE.setDatasetPath(self.testDatasetB) testFileE.addRun(Run(2, *[45])) testFileE.create() self.testFilesB.append(testFileD) self.testFilesB.append(testFileE) uploadFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) datasetAction = uploadFactory(classname="NewDataset") createAction = uploadFactory(classname="CreateBlocks") datasetAction.execute(datasetPath=self.testDatasetA) datasetAction.execute(datasetPath=self.testDatasetB) self.blockAName = self.testDatasetA + "#" + makeUUID() self.blockBName = self.testDatasetB + "#" + makeUUID() newBlockA = DBSBufferBlock(name=self.blockAName, location="srm-cms.cern.ch", datasetpath=None) newBlockA.setDataset(self.testDatasetA, 'data', 'VALID') newBlockA.status = 'Closed' newBlockB = DBSBufferBlock(name=self.blockBName, location="srm-cms.cern.ch", datasetpath=None) newBlockB.setDataset(self.testDatasetB, 'data', 'VALID') newBlockB.status = 'Closed' createAction.execute(blocks=[newBlockA, newBlockB]) bufferFactory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) setBlock = bufferFactory(classname="DBSBufferFiles.SetBlock") setBlock.execute(testFileA["lfn"], self.blockAName) setBlock.execute(testFileB["lfn"], self.blockAName) setBlock.execute(testFileC["lfn"], self.blockAName) setBlock.execute(testFileD["lfn"], self.blockBName) setBlock.execute(testFileE["lfn"], self.blockBName) fileStatus = bufferFactory(classname="DBSBufferFiles.SetStatus") fileStatus.execute(testFileA["lfn"], "LOCAL") fileStatus.execute(testFileB["lfn"], "LOCAL") fileStatus.execute(testFileC["lfn"], "LOCAL") fileStatus.execute(testFileD["lfn"], "LOCAL") fileStatus.execute(testFileE["lfn"], "LOCAL") associateWorkflow = buffer3Factory( classname="DBSBufferFiles.AssociateWorkflowToFile") associateWorkflow.execute(testFileA["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileB["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileC["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileD["lfn"], "BogusRequest", "BogusTask") associateWorkflow.execute(testFileE["lfn"], "BogusRequest", "BogusTask") return
def testVerySimpleTest(self): """ _VerySimpleTest_ Just test that everything works...more or less """ # return myThread = threading.currentThread() config = self.getConfig() name = makeUUID() nSubs = 5 nFiles = 10 workloadName = 'TestWorkload' dummyWorkload = self.createWorkload(workloadName=workloadName) workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl') self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath) testJobCreator = JobCreatorPoller(config=config) # First, can we run once without everything crashing? testJobCreator.algorithm() getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs") result = getJobsAction.execute(state='Created', jobType="Processing") self.assertEqual(len(result), nSubs * nFiles) # Count database objects result = myThread.dbi.processData( 'SELECT * FROM wmbs_sub_files_acquired')[0].fetchall() self.assertEqual(len(result), nSubs * nFiles) # Find the test directory testDirectory = os.path.join(self.testDir, 'jobCacheDir', 'TestWorkload', 'ReReco') # It should have at least one jobGroup self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory)) # But no more then twenty self.assertTrue(len(os.listdir(testDirectory)) <= 20) groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0') # First job should be in here listOfDirs = [] for tmpDirectory in os.listdir(testDirectory): listOfDirs.extend( os.listdir(os.path.join(testDirectory, tmpDirectory))) self.assertTrue('job_1' in listOfDirs) self.assertTrue('job_2' in listOfDirs) self.assertTrue('job_3' in listOfDirs) jobDir = os.listdir(groupDirectory)[0] jobFile = os.path.join(groupDirectory, jobDir, 'job.pkl') self.assertTrue(os.path.isfile(jobFile)) f = open(jobFile, 'r') job = pickle.load(f) f.close() self.assertEqual(job.baggage.PresetSeeder.generator.initialSeed, 1001) self.assertEqual(job.baggage.PresetSeeder.evtgenproducer.initialSeed, 1001) self.assertEqual(job['workflow'], name) self.assertEqual(len(job['input_files']), 1) self.assertEqual(os.path.basename(job['sandbox']), 'TestWorkload-Sandbox.tar.bz2') return
def execute(self, emulator=None): """ _execute_ """ # Are we using emulators again? if emulator != None: return emulator.emulate(self.step, self.job) logging.info("Steps.Executors.%s.execute called", self.__class__.__name__) # Search through steps for analysis files for step in self.stepSpace.taskSpace.stepSpaces(): if step == self.stepName: # Don't try to parse your own report; it's not there yet continue stepLocation = os.path.join(self.stepSpace.taskSpace.location, step) logging.info("Beginning report processing for step %s", step) reportLocation = os.path.join(stepLocation, 'Report.pkl') if not os.path.isfile(reportLocation): logging.error("Cannot find report for step %s in space %s", step, stepLocation) continue # First, get everything from a file and 'unpersist' it stepReport = Report() stepReport.unpersist(reportLocation, step) # Don't upload nor stage out files from bad steps. if not stepReport.stepSuccessful(step): continue # Pulling out the analysis files from each step analysisFiles = stepReport.getAnalysisFilesFromStep(step) # make sure all conditions from this job get the same uuid uuid = makeUUID() condFiles2copy = [] lumiFiles2copy = [] # Working on analysis files for analysisFile in analysisFiles: # deal with sqlite files if analysisFile.FileClass == "ALCA": sqlitefile = analysisFile.fileName.replace( 'sqlite_file:', '', 1) filenamePrefix = "Run%d@%s@%s" % ( self.step.condition.runNumber, analysisFile.inputtag, uuid) filenameDB = filenamePrefix + ".db" filenameTXT = filenamePrefix + ".txt" shutil.copy2(os.path.join(stepLocation, sqlitefile), filenameDB) textoutput = "prepMetaData %s\n" % analysisFile.prepMetaData textoutput += "prodMetaData %s\n" % analysisFile.prodMetaData with open(filenameTXT, "w") as fout: fout.write(textoutput) os.chmod( filenameDB, stat.S_IREAD | stat.S_IWRITE | stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH) os.chmod( filenameTXT, stat.S_IREAD | stat.S_IWRITE | stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH) condFiles2copy.append(filenameDB) condFiles2copy.append(filenameTXT) # deal with text files containing lumi info elif analysisFile.FileClass == "ALCATXT": shutil.copy2( os.path.join(stepLocation, analysisFile.fileName), analysisFile.fileName) lumiFiles2copy.append(analysisFile.fileName) # copy conditions files out and fake the job report addedOutputFJR = False if self.step.condition.lfnbase: logging.info("Copy out conditions files to %s", self.step.condition.lfnbase) for file2copy in condFiles2copy: logging.info("==> copy %s", file2copy) targetLFN = os.path.join(self.step.condition.lfnbase, file2copy) targetPFN = "root://eoscms//eos/cms%s" % targetLFN command = "env XRD_WRITERECOVERY=0 xrdcp -s -f %s %s" % ( file2copy, targetPFN) p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] if p.returncode > 0: msg = 'Failure during condition copy to EOS:\n' msg += ' %s\n' % output logging.error(msg) raise WMExecutionFailure(60319, "AlcaHarvestFailure", msg) # add fake output file to job report addedOutputFJR = True stepReport.addOutputFile(self.step.condition.outLabel, aFile={ 'lfn': targetLFN, 'pfn': targetPFN, 'module_label': self.step.condition.outLabel }) # copy luminosity files out if self.step.luminosity.url: logging.info("Copy out luminosity files to %s", self.step.luminosity.url) for file2copy in lumiFiles2copy: logging.info("==> copy %s", file2copy) targetPFN = rootUrlJoin(self.step.luminosity.url, file2copy) if not targetPFN: msg = 'No valid URL for lumi copy:\n' msg += ' %s\n' % self.step.luminosity.url logging.error(msg) raise WMExecutionFailure(60319, "AlcaHarvestFailure", msg) command = "env XRD_WRITERECOVERY=0 xrdcp -s -f %s %s" % ( file2copy, targetPFN) p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0] if p.returncode > 0: msg = 'Failure during copy to EOS:\n' msg += ' %s\n' % output logging.error(msg) raise WMExecutionFailure(60319, "AlcaHarvestFailure", msg) if not addedOutputFJR: # no conditions from AlcaHarvest is a valid result, can # happen if calibration algorithms produced no output # due to not enough statistics or other reasons # # add fake placeholder output file to job report logging.info( "==> no sqlite files from AlcaHarvest job, creating placeholder file record" ) stepReport.addOutputFile(self.step.condition.outLabel, aFile={ 'lfn': "/no/output", 'pfn': "/no/output", 'module_label': self.step.condition.outLabel }) # Am DONE with report # Persist it stepReport.persist(reportLocation) return
def testFailJobInput(self): """ _testFailJobInput_ Test the Jobs.FailInput DAO and verify that it doesn't affect other jobs/subscriptions that run over the same files. """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") bogusWorkflow = Workflow(spec="spec1.xml", owner="Steve", name="wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name="TestFileset") bogusFileset = Fileset(name="BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) bogusSubscription = Subscription(fileset=bogusFileset, workflow=bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.completeFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription=testSubscription) bogusJobGroup = JobGroup(subscription=bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name="TestJobA", files=[testFileA, testFileB, testFileC]) testJobB = Job(name="TestJobB", files=[testFileA, testFileB, testFileC]) bogusJob = Job(name="BogusJob", files=[testFileA, testFileB, testFileC]) testJobA.create(group=testJobGroup) testJobB.create(group=testJobGroup) bogusJob.create(group=bogusJobGroup) testJobA.failInputFiles() testJobB.failInputFiles() self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) changeStateAction = self.daoFactory(classname="Jobs.ChangeState") testJobB["state"] = "cleanout" changeStateAction.execute([testJobB]) # Try again testJobA.failInputFiles() # Should now be failed self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) # bogus should be unchanged self.assertEqual(len(bogusSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Acquired")), 3) self.assertEqual(len(bogusSubscription.filesOfStatus("Failed")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Completed")), 0) return
def createTestJobGroup(self, config, name="TestWorkthrough", filesetName="TestFileset", specLocation="spec.xml", error=False, task="/TestWorkload/ReReco", type="Processing"): """ Creates a group of several jobs """ myThread = threading.currentThread() testWorkflow = Workflow(spec=specLocation, owner=self.OWNERDN, name=name, task=task, owner_vogroup="", owner_vorole="") testWorkflow.create() self.inject.execute(names=[name], injected=True) testWMBSFileset = Fileset(name=filesetName) testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12314])) testFileB.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testWMBSFileset.markOpen(0) outputWMBSFileset = Fileset(name='%sOutput' % filesetName) outputWMBSFileset.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10) testFileC.addRun(Run(10, *[12312])) testFileC.setLocation('malpaquet') testFileC.create() outputWMBSFileset.addFile(testFileC) outputWMBSFileset.commit() outputWMBSFileset.markOpen(0) testWorkflow.addOutput('output', outputWMBSFileset) testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=type) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313]) testJobGroup.add(testJob) testJobGroup.commit() changer = ChangeState(config) report1 = Report() report2 = Report() if error: path1 = os.path.join(WMCore.WMBase.getTestBase(), "WMComponent_t/JobAccountant_t/fwjrs", "badBackfillJobReport.pkl") path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') else: path1 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'mergeReport1.pkl') path2 = os.path.join(WMCore.WMBase.getTestBase(), 'WMComponent_t/TaskArchiver_t/fwjrs', 'logCollectReport2.pkl') report1.load(filename=path1) report2.load(filename=path2) changer.propagate(testJobGroup.jobs, 'created', 'new') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') for i in range(self.nJobs): if i < self.nJobs / 2: testJobGroup.jobs[i]['fwjr'] = report1 else: testJobGroup.jobs[i]['fwjr'] = report2 changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed') changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff') changer.propagate(testJobGroup.jobs, 'executing', 'created') changer.propagate(testJobGroup.jobs, 'complete', 'executing') changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete') changer.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed') changer.propagate(testJobGroup.jobs, 'exhausted', 'retrydone') changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted') testSubscription.completeFiles([testFileA, testFileB]) return testJobGroup
def createJobsLocationWise(self, fileset, endOfRun, dqmHarvestUnit, lumiMask, goodRunList): myThread = threading.currentThread() fileset.loadData(parentage=0) allFiles = fileset.getFiles() # sort by location and run locationDict = {} runDict = {} for fileInfo in allFiles: locSet = frozenset(fileInfo['locations']) runSet = fileInfo.getRuns() if len(locSet) == 0: logging.error("File %s has no locations!", fileInfo['lfn']) if len(runSet) == 0: logging.error("File %s has no run information!", fileInfo['lfn']) # Populate a dictionary with [location][run] so we can split jobs according to those different combinations if locSet not in locationDict.keys(): locationDict[locSet] = {} fileInfo['runs'] = set() # Handle jobs with run whitelist/blacklist if goodRunList: runDict[fileInfo['lfn']] = set() for run in runSet: if run.run in goodRunList: runDict[fileInfo['lfn']].add(run) if run.run in locationDict[locSet].keys(): locationDict[locSet][run.run].append(fileInfo) else: locationDict[locSet][run.run] = [fileInfo] elif lumiMask: # it has lumiMask, thus we consider only good run/lumis newRunSet = [] for run in runSet: if not isGoodRun(lumiMask, run.run): continue # then loop over lumis maskedLumis = [] for lumi in run.lumis: if not isGoodLumi(lumiMask, run.run, lumi): continue maskedLumis.append(lumi) if not maskedLumis: continue maskedRun = Run(run.run, *maskedLumis) newRunSet.append(maskedRun) if run.run in locationDict[locSet].keys(): locationDict[locSet][run.run].append(fileInfo) else: locationDict[locSet][run.run] = [fileInfo] if newRunSet: runDict[fileInfo['lfn']] = newRunSet else: # no LumiList and no run white or black list runDict[fileInfo['lfn']] = runSet for run in runSet: if run.run in locationDict[locSet].keys(): locationDict[locSet][run.run].append(fileInfo) else: locationDict[locSet][run.run] = [fileInfo] # create separate jobs for different locations self.newGroup() self.jobCount = 0 baseName = makeUUID() self.newGroup() if endOfRun: harvestType = "EndOfRun" else: harvestType = "Periodic" for location in locationDict.keys(): if dqmHarvestUnit == "byRun": self.createJobByRun(locationDict, location, baseName, harvestType, runDict, endOfRun) else: self.createMultiRunJob(locationDict, location, baseName, harvestType, runDict, endOfRun) return