def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Test the create(), delete() and exists() methods of the file class by creating and deleting a file. The exists() method will be called before and after creation and after deletion. """ testFile = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1111}) assert testFile.exists() == False, \ "ERROR: File exists before it was created" testFile.addRun(Run(1, *[45])) testFile.create() assert testFile.exists() > 0, \ "ERROR: File does not exist after it was created" testFile.delete() assert testFile.exists() == False, \ "ERROR: File exists after it has been deleted" return
def testSetLocation(self): """ _testSetLocation_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"]) testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"], immediateSave=False) testFileB = File(id=testFileA["id"]) testFileB.loadData() goldenLocations = ["se1.fnal.gov", "se1.cern.ch"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def testCreateTransaction(self): """ _testCreateTransaction_ Begin a transaction and then create a file in the database. Afterwards, rollback the transaction. Use the File class's exists() method to to verify that the file doesn't exist before it was created, exists after it was created and doesn't exist after the transaction was rolled back. """ myThread = threading.currentThread() myThread.transaction.begin() testFile = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1111}) assert testFile.exists() == False, \ "ERROR: File exists before it was created" testFile.addRun(Run(1, *[45])) testFile.create() assert testFile.exists() > 0, \ "ERROR: File does not exist after it was created" myThread.transaction.rollback() assert testFile.exists() == False, \ "ERROR: File exists after transaction was rolled back." return
def testAddToFileset(self): """ _AddToFileset_ Test to see if we can add to a fileset using the DAO """ testFileset = Fileset(name="inputFileset") testFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[45])) testFileB.create() addToFileset = self.daofactory(classname="Files.AddToFileset") addToFileset.execute(file=[testFileA['lfn'], testFileB['lfn']], fileset=testFileset.id) testFileset2 = Fileset(name="inputFileset") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 2) for file in testFileset2.files: self.assertTrue(file in [testFileA, testFileB]) # Check that adding twice doesn't crash addToFileset.execute(file=[testFileA['lfn'], testFileB['lfn']], fileset=testFileset.id)
def sortedFilesFromMergeUnits(mergeUnits): """ _sortedFilesFromMergeUnits_ Given a list of merge units sort them and the files that they contain. Return a list of sorted WMBS File structures. """ mergeUnits.sort(mergeUnitCompare) sortedFiles = [] for mergeUnit in mergeUnits: mergeUnit["files"].sort(fileCompare) for file in mergeUnit["files"]: newFile = File(id=file["file_id"], lfn=file["file_lfn"], events=file["file_events"]) newFile.addRun(Run(file["file_run"], file["file_lumi"])) # The WMBS data structure puts locations that are passed in through # the constructor in the "newlocations" attribute. We want these to # be in the "locations" attribute so that they get picked up by the # job submitter. newFile["locations"] = set([file["pnn"]]) newFile.addRun(Run(file["file_run"], file["file_lumi"])) sortedFiles.append(newFile) return sortedFiles
def createMergeJob(self, mergeableFiles): """ _createMergeJob_ Create a merge job for the given merge units. All the files contained in the merge units will be associated to the job. """ if self.currentGroup == None: self.newGroup() self.newJob(name=self.getJobName()) mergeableFiles.sort(fileCompare) for file in mergeableFiles: newFile = File(id=file["file_id"], lfn=file["file_lfn"], events=file["file_events"]) # The WMBS data structure puts locations that are passed in through # the constructor in the "newlocations" attribute. We want these to # be in the "locations" attribute so that they get picked up by the # job submitter. newFile["locations"] = set([file["se_name"]]) newFile.addRun(Run(file["file_run"], file["file_lumi"])) self.currentJob.addFile(newFile) self.currentJob.addResourceEstimates( disk=float(file["file_size"]) / 1024)
def test06(self): """ _test06_ Test repacking of 3 lumis 2 small lumis (single job), followed by a big one (multiple jobs) files for lumi 1 and 2 are below multi-lumi thresholds files for lumi 3 are above single-lumi threshold """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 3]: filecount = 2 for i in range(filecount): if lumi == 3: nevents = 500 else: nevents = 100 newFile = File(makeUUID(), size = 1000, events = nevents) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxLatency'] = 50000 mySplitArgs['maxInputEvents'] = 900 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1, "ERROR: second job does not process 1 file") self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1, "ERROR: third job does not process 1 file") return
def injectFilesFromDBS(inputFileset, datasetPath): """ _injectFilesFromDBS_ """ print "injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name) args={} args["url"] = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader" args["version"] = "DBS_2_0_9" args["mode"] = "GET" dbsApi = DbsApi(args) dbsResults = dbsApi.listFiles(path = datasetPath, retriveList = ["retrive_block","retrive_lumi", "retrive_run"]) # NOTE : this is to limit the number of jobs to create ... simply using first 10 files get for the needed dataset dbsResults =dbsResults[0:2] print " found %d files, inserting into wmbs..." % (len(dbsResults)) for dbsResult in dbsResults: myFile = File(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"], events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]}, locations = set(['srm.ciemat.es','storm-se-01.ba.infn.it','storage01.lcg.cscs.ch'])) myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"]) for lumi in dbsResult["LumiList"]: myRun.lumis.append(lumi["LumiSectionNumber"]) myFile.addRun(myRun) myFile.create() inputFileset.addFile(myFile) inputFileset.commit() inputFileset.markOpen(False) return
def testSetLocation(self): """ _testSetLocation_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"]) testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"], immediateSave = False) testFileB = File(id = testFileA["id"]) testFileB.loadData() goldenLocations = ["se1.fnal.gov", "se1.cern.ch"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def test03(self): """ _test03_ Test single lumi event threshold Single lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1]: filecount = 8 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxLatency'] = 50000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputEvents'] = 650 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 6, "ERROR: Job does not process 6 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.assertEqual(self.getNumActiveSplitLumis(), 1, "ERROR: Split lumis were not created") return
def testAddToFileset(self): """ _AddToFileset_ Test to see if we can add to a fileset using the DAO """ testFileset = Fileset(name = "inputFileset") testFileset.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run( 1, *[45])) testFileB.create() addToFileset = self.daofactory(classname = "Files.AddToFileset") addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFileset.id) testFileset2 = Fileset(name = "inputFileset") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 2) for file in testFileset2.files: self.assertTrue(file in [testFileA, testFileB]) # Check that adding twice doesn't crash addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFileset.id)
def injectFilesFromDBS(inputFileset, datasetPath, runsWhiteList=[]): """ _injectFilesFromDBS_ """ print("injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name)) args = {} args["url"] = "https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader" args["version"] = "DBS_2_1_1" args["mode"] = "GET" dbsApi = DbsApi(args) dbsResults = dbsApi.listFileArray(path=datasetPath, retriveList=["retrive_lumi", "retrive_run"]) print(" found %d files, inserting into wmbs..." % (len(dbsResults))) for dbsResult in dbsResults: if runsWhiteList and str(dbsResult["LumiList"][0]["RunNumber"]) not in runsWhiteList: continue myFile = File(lfn=dbsResult["LogicalFileName"], size=dbsResult["FileSize"], events=dbsResult["NumberOfEvents"], checksums={"cksum": dbsResult["Checksum"]}, locations="cmssrm.fnal.gov", merged=True) myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"]) for lumi in dbsResult["LumiList"]: myRun.appendLumi(lumi["LumiSectionNumber"]) myFile.addRun(myRun) myFile.create() inputFileset.addFile(myFile) if len(inputFileset) < 1: raise Exception("No files were selected!") inputFileset.commit() inputFileset.markOpen(False) return
def test03(self): """ _test03_ Test input size threshold on multi lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) mySplitArgs["maxInputSize"] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") time.sleep(1) mySplitArgs["maxLatency"] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return
def _addACDCFileToWMBSFile(self, acdcFile, inFileset = True): """ """ wmbsParents = [] #pass empty check sum since it won't be updated to dbs anyway checksums = {} wmbsFile = File(lfn = str(acdcFile["lfn"]), size = acdcFile["size"], events = acdcFile["events"], checksums = checksums, #TODO: need to get list of parent lfn parents = acdcFile["parents"], locations = acdcFile["locations"], merged = acdcFile.get('merged', True)) ## TODO need to get the lumi lists for run in acdcFile['runs']: wmbsFile.addRun(run) dbsFile = self._convertACDCFileToDBSFile(acdcFile) self._addToDBSBuffer(dbsFile, checksums, acdcFile["locations"]) logging.info("WMBS File: %s\n on Location: %s" % (wmbsFile['lfn'], wmbsFile['locations'])) if inFileset: wmbsFile['inFileset'] = True else: wmbsFile['inFileset'] = False self.wmbsFilesToCreate.append(wmbsFile) return wmbsFile
def generateFakeMCFile(self, numEvents = 100, firstEvent = 1, lastEvent = 100, firstLumi = 1, lastLumi = 10, index = 1): #MC comes with only one MCFakeFile singleMCFileset = Fileset(name = "MCTestFileset %i" % index) singleMCFileset.create() newFile = File("MCFakeFileTest %i" % index, size = 1000, events = numEvents, locations = set(["somese.cern.ch"])) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent newFile.create() singleMCFileset.addFile(newFile) singleMCFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() singleMCFileSubscription = Subscription(fileset = singleMCFileset, workflow = testWorkflow, split_algo = "EventBased", type = "Production") singleMCFileSubscription.create() return singleMCFileSubscription
def populateACDCCouch(self, numFiles = 2, lumisPerJob = 35, eventsPerJob = 20000): """ _populateACDCCouch_ Create production files in couchDB to test the creation of ACDC jobs for the EventBased algorithm """ # Define some constants workflowName = "ACDC_TestEventBased" filesetName = "/%s/Production" % workflowName owner = "*****@*****.**" group = "unknown" lumisPerFile = lumisPerJob * 250 for i in range(numFiles): for j in range(250): lfn = "MCFakeFile-some-hash-%s" % str(i).zfill(5) acdcFile = File(lfn = lfn, size = 100, events = eventsPerJob, locations = self.validLocations, merged = False, first_event = 1) run = Run(1, *range(1 + (i * lumisPerFile) + j * lumisPerJob, (j + 1) * lumisPerJob + (i * lumisPerFile) + 2)) acdcFile.addRun(run) acdcDoc = {"collection_name" : workflowName, "collection_type" : "ACDC.CollectionTypes.DataCollection", "files" : {lfn : acdcFile}, "fileset_name" : filesetName, "owner" : {"user": owner, "group" : group}} self.couchDB.queue(acdcDoc) self.couchDB.commit() return
def test05(self): """ _test05_ Test multi lumis express merges with holes """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return
def injectFilesFromDBS(inputFileset, datasetPath, runsWhiteList=[]): """ _injectFilesFromDBS_ """ print "injecting files from %s into %s, please wait..." % (datasetPath, inputFileset.name) args={} args["url"] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" args["version"] = "DBS_2_1_1" args["mode"] = "GET" dbsApi = DbsApi(args) dbsResults = dbsApi.listFiles(path = datasetPath, retriveList = ["retrive_lumi", "retrive_run"]) print " found %d files, inserting into wmbs..." % (len(dbsResults)) for dbsResult in dbsResults: if runsWhiteList and str(dbsResult["LumiList"][0]["RunNumber"]) not in runsWhiteList: continue myFile = File(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"], events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]}, locations = "cmssrm.fnal.gov", merged = True) myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"]) for lumi in dbsResult["LumiList"]: myRun.lumis.append(lumi["LumiSectionNumber"]) myFile.addRun(myRun) myFile.create() inputFileset.addFile(myFile) if len(inputFileset) < 1: raise Exception, "No files were selected!" inputFileset.commit() inputFileset.markOpen(False) return
def createCommonFileset(): """ Create a simple fileset with 2 files at the same location """ multipleFilesFileset = Fileset(name="TestFileset") newFile = File("/some/file/test1", size=1000, events=100) newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7])) newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test2", size=2000, events=200) newFile.addRun(Run(3, *[2, 8])) newFile.addRun(Run(4, *[3, 8])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test3", size=3000, events=300) newFile.addRun(Run(5, *[10, 11, 12])) newFile.addRun(Run(6, *[10, 11, 12])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) newFile = File("/some/file/test4", size=4000, events=400) newFile.addRun(Run(2, *[3, 8, 9])) newFile.addRun(Run(3, *[3, 4, 5, 6])) newFile.setLocation('T2_CH_CERN') multipleFilesFileset.addFile(newFile) multipleFilesFileset.create() return multipleFilesFileset
def test05(self): """ _test05_ Test repacking of multiple lumis with holes in the lumi sequence Multi lumi input """ mySplitArgs = self.splitArgs.copy() insertClosedLumiBinds = [] for lumi in [1, 2, 4]: filecount = 2 for i in range(filecount): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomeSE", immediateSave = False) newFile.create() self.fileset1.addFile(newFile) insertClosedLumiBinds.append( { 'RUN' : 1, 'LUMI' : lumi, 'STREAM' : "A", 'FILECOUNT' : filecount, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime } ) self.fileset1.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1) self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds, transaction = False) mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4, "ERROR: first job does not process 4 files") return
def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a file and commit it to the database. Start a new transaction and delete the file. Rollback the transaction after the file has been deleted. Use the file class's exists() method to verify that the file does not exist after it has been deleted but does exist after the transaction is rolled back. """ testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums={'cksum': 1111}) assert testFile.exists() == False, \ "ERROR: File exists before it was created" testFile.addRun(Run(1, *[45])) testFile.create() assert testFile.exists() > 0, \ "ERROR: File does not exist after it was created" myThread = threading.currentThread() myThread.transaction.begin() testFile.delete() assert testFile.exists() == False, \ "ERROR: File exists after it has been deleted" myThread.transaction.rollback() assert testFile.exists() > 0, \ "ERROR: File does not exist after transaction was rolled back." return
def populateACDCCouch(self, numFiles=3, lumisPerJob=4, eventsPerJob=100, numberOfJobs=250): """ _populateACDCCouch_ Create production files in couchDB to test the creation of ACDC jobs for the EventBased algorithm. """ # Define some constants workflowName = "ACDC_TestEventBased" filesetName = "/%s/Production" % workflowName lumisPerFile = lumisPerJob * numberOfJobs for i in range(numFiles): lfn = "MCFakeFile-some-hash-%s" % str(i).zfill(5) for j in range(numberOfJobs): firstEvent = j * eventsPerJob + 1 acdcFile = File(lfn=lfn, size=1024, events=eventsPerJob, locations=self.validLocations, merged=False, first_event=firstEvent) run = Run(1, *range(1 + j * lumisPerJob + (i * lumisPerFile), 1 + (j + 1) * lumisPerJob + (i * lumisPerFile))) acdcFile.addRun(run) acdcDoc = {"collection_name": workflowName, "collection_type": "ACDC.CollectionTypes.DataCollection", "files": {lfn: acdcFile}, "fileset_name": filesetName} self.couchDB.queue(acdcDoc) self.couchDB.commit() return
def test05(self): """ _test05_ Test multi lumis express merges with holes """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) time.sleep(1) mySplitArgs['maxLatency'] = 1 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs") return
def testCreateWithLocation(self): """ _testCreateWithLocation_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}, locations = set(["T1_US_FNAL_Disk", "T2_CH_CERN"])) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(id = testFileA["id"]) testFileB.loadData() goldenLocations = ["T1_US_FNAL_Disk", "T2_CH_CERN"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def testSetLocationByLFN(self): """ _testSetLocationByLFN_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10, checksums = {'cksum':1}) testFileB.addRun(Run( 1, *[45])) testFileB.create() parentAction = self.daofactory(classname = "Files.SetLocationByLFN") binds = [{'lfn': "/this/is/a/lfnA", 'location': 'se1.fnal.gov'}, {'lfn': "/this/is/a/lfnB", 'location': 'se1.fnal.gov'}] parentAction.execute(lfn = binds) testFileC = File(id = testFileA["id"]) testFileC.loadData() testFileD = File(id = testFileB["id"]) testFileD.loadData() self.assertEqual(testFileC['locations'], set(['se1.fnal.gov'])) self.assertEqual(testFileD['locations'], set(['se1.fnal.gov'])) return
def testCreateTransaction(self): """ _testCreateTransaction_ Begin a transaction and then create a file in the database. Afterwards, rollback the transaction. Use the File class's exists() method to to verify that the file doesn't exist before it was created, exists after it was created and doesn't exist after the transaction was rolled back. """ myThread = threading.currentThread() myThread.transaction.begin() testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums={'cksum':1111}) assert testFile.exists() == False, \ "ERROR: File exists before it was created" testFile.addRun(Run(1, *[45])) testFile.create() assert testFile.exists() > 0, \ "ERROR: File does not exist after it was created" myThread.transaction.rollback() assert testFile.exists() == False, \ "ERROR: File exists after transaction was rolled back." return
def createSubscription(self, nFiles, lumisPerFile, twoSites=False, rand=False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name=baseName) testFileset.create() parentFile = File('%s_parent' % baseName, size=1000, events=100, locations=set(["T1_US_FNAL_Disk"])) parentFile.create() for i in range(nFiles): newFile = File(lfn='%s_%i' % (baseName, i), size=1000, events=100, locations="T1_US_FNAL_Disk") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn='%s_%i_2' % (baseName, i), size=1000, events=100, locations="T2_CH_CERN") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow, split_algo="LumiBased", type="Processing") testSubscription.create() return testSubscription
def createTestJob(subscriptionType="Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name=makeUUID(), files=[testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup) testJob.associateFiles() return testJob
def testD_NonContinuousLumis(self): """ _NonContinuousLumis_ Test and see if LumiBased can work when the lumis are non continuous """ baseName = makeUUID() nFiles = 10 testFileset = Fileset(name = baseName) testFileset.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") # Set to two non-continuous lumi numbers lumis = [100 + i, 200 + i] newFile.addRun(Run(i, *lumis)) newFile.create() testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = testSubscription) jobGroups = jobFactory(lumis_per_job = 2, halt_job_on_file_boundaries = False, splitOnRun = False, performance = self.performanceParams) self.assertEqual(len(jobGroups), 1) jobs = jobGroups[0].jobs self.assertEqual(len(jobs), 10) for j in jobs: runs = j['mask'].getRunAndLumis() for r in runs.keys(): self.assertEqual(len(runs[r]), 2) for l in runs[r]: # Each run should have two lumis # Each lumi should be of form [x, x] # meaning that the first and last lumis are the same self.assertEqual(len(l), 2) self.assertEqual(l[0], l[1]) self.assertEqual(j['estimatedJobTime'], 100 * 12) self.assertEqual(j['estimatedDiskUsage'], 100 * 400) self.assertEqual(j['estimatedMemoryUsage'], 2300) return
def testAddDupsToFilesetBulk(self): """ _AddToDupsFilesetBulk_ Same as testAddDupsToFileset() but faster """ testWorkflowA = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflowA.create() testWorkflowB = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production2") testWorkflowB.create() testFilesetA = Fileset(name = "inputFilesetA") testFilesetA.create() testFilesetB = Fileset(name = "inputFilesetB") testFilesetB.create() testSubscriptionA = Subscription(workflow = testWorkflowA, fileset = testFilesetA) testSubscriptionA.create() testSubscriptionB = Subscription(workflow = testWorkflowB, fileset = testFilesetB) testSubscriptionB.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, locations = ['SiteA']) testFileA.addRun(Run( 1, *[45])) testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10, locations = ['SiteB']) testFileB.addRun(Run( 1, *[45])) addFilesToWMBSInBulk(testFilesetA.id, "wf001", [testFileA, testFileB], conn = testFileA.getDBConn(), transaction = testFileA.existingTransaction()) testFileset2 = Fileset(name = "inputFilesetA") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 2) for file in testFileset2.files: self.assertTrue(file in [testFileA, testFileB]) # Check that adding twice doesn't crash addFilesToWMBSInBulk(testFilesetA.id, "wf001", [testFileA, testFileB], conn = testFileA.getDBConn(), transaction = testFileA.existingTransaction()) # Files should not get added to fileset B because fileset A is associated # with wf001. addFilesToWMBSInBulk(testFilesetB.id, "wf001", [testFileA, testFileB], conn = testFileA.getDBConn(), transaction = testFileA.existingTransaction()) testFileset2 = Fileset(name = "inputFilesetB") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 0) return
def notestTwoJobGroups(self): """ Test two job groups with a shared fileset. (Minimal part of testGetLocations which was failing) """ testWorkflow1 = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test1") testWorkflow1.create() testWMBSFileset1 = WMBSFileset(name="TestFileset1") testWMBSFileset1.create() testSubscription1 = Subscription(fileset=testWMBSFileset1, workflow=testWorkflow1) testSubscription1.create() testJobGroup1 = JobGroup(subscription=testSubscription1) testJobGroup1.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.create() testJobA = Job(name="TestJobA") testJobA.addFile(testFileA) testJobGroup1.add(testJobA) testJobGroup1.commit() testWorkflow2 = Workflow(spec="spec.xml", owner="Simon", name="wf002", task="Test2") testWorkflow2.create() testWMBSFileset2 = WMBSFileset(name="TestFileset1") testWMBSFileset2.create() testSubscription2 = Subscription(fileset=testWMBSFileset2, workflow=testWorkflow2) testSubscription2.create() testJobGroup2 = JobGroup(subscription=testSubscription2) testJobGroup2.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10) testFileC.addRun(Run(10, *[12312])) testFileC.create() testJobA1 = Job(name="TestJobA1") testJobA1.addFile(testFileC) testJobGroup2.add(testJobA1) testJobGroup2.commit()
def testAddDupsToFileset(self): """ _AddToDupsFileset_ Verify the the dups version of the AddToFileset DAO will not add files to a fileset if they're already associated to another fileset with the same workflow. """ testWorkflowA = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflowA.create() testWorkflowB = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production2") testWorkflowB.create() testFilesetA = Fileset(name = "inputFilesetA") testFilesetA.create() testFilesetB = Fileset(name = "inputFilesetB") testFilesetB.create() testSubscriptionA = Subscription(workflow = testWorkflowA, fileset = testFilesetA) testSubscriptionA.create() testSubscriptionB = Subscription(workflow = testWorkflowB, fileset = testFilesetB) testSubscriptionB.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run( 1, *[45])) testFileB.create() addToFileset = self.daofactory(classname = "Files.AddDupsToFileset") addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFilesetA.id, workflow = "wf001") testFileset2 = Fileset(name = "inputFilesetA") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 2) for file in testFileset2.files: self.assertTrue(file in [testFileA, testFileB]) # Check that adding twice doesn't crash addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFilesetA.id, workflow = "wf001") # Files should not get added to fileset B because fileset A is associated # with wf001. addToFileset.execute(file = [testFileA['lfn'], testFileB['lfn']], fileset = testFilesetB.id, workflow = "wf001") testFileset2 = Fileset(name = "inputFilesetB") testFileset2.loadData() self.assertEqual(len(testFileset2.files), 0) return
def test06(self): """ _test06_ Test max input files threshold for multi lumi 3 same size lumis """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 3]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxInputFiles'] = 5 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return
def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"]) self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting") myThread = threading.currentThread() self.myThread = myThread daoFactory = DAOFactory(package="WMCore.WMBS", logger=logging, dbinterface=myThread.dbi) self.WMBSFactory = daoFactory config = self.getConfig() self.changer = ChangeState(config) myResourceControl = ResourceControl() myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T1_US_FNAL_Disk", "T1_US_FNAL") myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T3_US_FNALLPC", "T1_US_FNAL") myResourceControl.insertSite("T2_CH_CERN", 10, 20, "T2_CH_CERN", "T2_CH_CERN") self.fileset1 = Fileset(name="TestFileset1") for fileNum in range(11): newFile = File("/some/file/name%d" % fileNum, size=1000, events=100) newFile.addRun(Run(1, *[1])) newFile.setLocation('T1_US_FNAL_Disk') self.fileset1.addFile(newFile) self.fileset1.create() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Harvest", type="Harvesting") self.subscription1.create() self.configFile = EmulatorSetup.setupWMAgentConfig() return
def test00(self): """ _test00_ Test that the job name prefix feature works Test max edm size threshold for single lumi small lumi, followed by over-large lumi expect 1 job for small lumi and 4 jobs for over-large """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2 * lumi): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") mySplitArgs['maxEdmSize'] = 13000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 3, "ERROR: JobFactory didn't create three jobs") job = jobGroups[0].jobs[0] self.assertTrue(job['name'].startswith("RepackMerge-"), "ERROR: Job has wrong name") self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") job = jobGroups[0].jobs[1] self.assertEqual(len(job.getFiles()), 3, "ERROR: Job does not process 3 files") job = jobGroups[0].jobs[2] self.assertEqual(len(job.getFiles()), 1, "ERROR: Job does not process 1 file") return
def _addDBSFileToWMBSFile(self, dbsFile, storageElements, inFileset=True): """ There are two assumptions made to make this method behave properly, 1. DBS returns only one level of ParentList. If DBS returns multiple level of parentage, it will be still get handled. However that might not be what we wanted. In that case, restrict to one level. 2. Assumes parents files are in the same location as child files. This is not True in general case, but workquue should only select work only where child and parent files are in the same location """ # TODO get dbsFile with lumi event information wmbsParents = [] dbsFile.setdefault("ParentList", []) for parent in dbsFile["ParentList"]: wmbsParents.append( self._addDBSFileToWMBSFile(parent, storageElements, inFileset=False)) checksums = {} if dbsFile.get('Checksum'): checksums['cksum'] = dbsFile['Checksum'] if dbsFile.get('Adler32'): checksums['adler32'] = dbsFile['Adler32'] wmbsFile = File( lfn=dbsFile["LogicalFileName"], size=dbsFile["FileSize"], events=dbsFile["NumberOfEvents"], checksums=checksums, # TODO: need to get list of parent lfn parents=wmbsParents, locations=set(storageElements)) for lumi in dbsFile['LumiList']: if isinstance(lumi['LumiSectionNumber'], list): lumiSecList = ( list(zip(lumi['LumiSectionNumber'], lumi['EventCount'])) if 'EventCount' in lumi else lumi['LumiSectionNumber']) run = Run(lumi['RunNumber'], lumiSecList) else: lumiSecTuple = ((lumi['LumiSectionNumber'], lumi['EventCount']) if 'EventCount' in lumi else lumi['LumiSectionNumber']) run = Run(lumi['RunNumber'], lumiSecTuple) wmbsFile.addRun(run) self._addToDBSBuffer(dbsFile, checksums, storageElements) logging.debug("WMBS File: %s on Location: %s", wmbsFile['lfn'], wmbsFile['newlocations']) wmbsFile['inFileset'] = bool(inFileset) self.wmbsFilesToCreate.add(wmbsFile) return wmbsFile
def test10(self): """ _test10_ Test merging of multiple lumis with holes in the lumi sequence Hole is due to no streamer files for the lumi Multi lumi input It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize. It was changed due to the maxinputevents not being used anymore """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size=1000, events=100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave=False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2) mySplitArgs['minInputSize'] = 100000 mySplitArgs['maxInputSize'] = 200000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': 3, 'STREAM': "A", 'FILECOUNT': 0, 'INSERT_TIME': self.currentTime, 'CLOSE_TIME': self.currentTime }, transaction=False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
def test09(self): """ _test09_ Test under merge (over merge event threshold) one small lumi, one large lumi (small below min size, large below max size, but both together above max size) """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2]: for i in range(2): newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 3000 mySplitArgs['maxInputSize'] = 9000 mySplitArgs['maxInputEvents'] = 300 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") self.fileset2.markOpen(False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 2, "ERROR: Job does not process 2 files") return
def addMCFakeFile(self): """Add a fake file for wmbs to run production over""" needed = [ 'FirstEvent', 'FirstLumi', 'FirstRun', 'LastEvent', 'LastLumi', 'LastRun' ] for key in needed: if self.mask and self.mask.get(key) is None: msg = 'Invalid value "%s" for %s' % (self.mask.get(key), key) raise WorkQueueWMBSException(msg) locations = set() siteInfo = self.getLocationInfo.execute( conn=self.getDBConn(), transaction=self.existingTransaction()) for site in siteInfo: if site['pnn'] in self.commonLocation: locations.add(site['pnn']) if not locations: msg = 'No locations to inject Monte Carlo work to, unable to proceed' raise WorkQueueWMBSException(msg) mcFakeFileName = ("MCFakeFile-%s" % self.topLevelFileset.name).encode( 'ascii', 'ignore') wmbsFile = File( lfn=mcFakeFileName, first_event=self.mask['FirstEvent'], last_event=self.mask['LastEvent'], events=self.mask['LastEvent'] - self.mask['FirstEvent'] + 1, # inclusive range locations=locations, merged=False, # merged causes dbs parentage relation ) if self.mask: lumis = list( range(self.mask['FirstLumi'], self.mask['LastLumi'] + 1)) # inclusive range wmbsFile.addRun(Run(self.mask['FirstRun'], *lumis)) # assume run number static else: wmbsFile.addRun(Run(1, 1)) wmbsFile['inFileset'] = True # file is not a parent logging.debug("WMBS MC Fake File: %s on Location: %s", wmbsFile['lfn'], wmbsFile['newlocations']) self.wmbsFilesToCreate.add(wmbsFile) totalFiles = self.topLevelFileset.addFilesToWMBSInBulk( self.wmbsFilesToCreate, self.wmSpec.name(), isDBS=self.isDBS) self.topLevelFileset.markOpen(False) return totalFiles
def _addACDCFileToWMBSFile(self, acdcFile, inFileset=True): """ adds the ACDC files into WMBS database """ wmbsParents = [] # TODO: this check can be removed when ErrorHandler filters parents file for unmerged data if acdcFile["parents"]: firstParent = next(iter(acdcFile["parents"])) # If files is merged and has unmerged parents skip the wmbs population if acdcFile.get("merged", 0) and ("/store/unmerged/" in firstParent or "MCFakeFile" in firstParent): # don't set the parents pass else: # set the parentage for all the unmerged parents for parent in acdcFile["parents"]: logging.debug("WMBS ACDC Parent File: %s", parent) parent = self._addACDCFileToWMBSFile(DatastructFile( lfn=parent, locations=acdcFile["locations"], merged=True), inFileset=False) wmbsParents.append(parent) # pass empty check sum since it won't be updated to dbs anyway checksums = {} wmbsFile = File(lfn=str(acdcFile["lfn"]), size=acdcFile["size"], events=acdcFile["events"], first_event=acdcFile.get('first_event', 0), last_event=acdcFile.get('last_event', 0), checksums=checksums, parents=wmbsParents, locations=acdcFile["locations"], merged=acdcFile.get('merged', True)) ## TODO need to get the lumi lists for run in acdcFile['runs']: wmbsFile.addRun(run) if not acdcFile["lfn"].startswith("/store/unmerged") or wmbsParents: # only add to DBSBuffer if is not unmerged file or it has parents. dbsFile = self._convertACDCFileToDBSFile(acdcFile) self._addToDBSBuffer(dbsFile, checksums, acdcFile["locations"]) logging.debug("WMBS ACDC File: %s on Location: %s", wmbsFile['lfn'], wmbsFile['newlocations']) wmbsFile['inFileset'] = bool(inFileset) self.wmbsFilesToCreate.add(wmbsFile) return wmbsFile
def test10(self): """ _test10_ Test merging of multiple lumis with holes in the lumi sequence Hole is due to no streamer files for the lumi Multi lumi input It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize. It was changed due to the maxinputevents not being used anymore """ mySplitArgs = self.splitArgs.copy() for lumi in [1, 2, 4]: for i in range(2): newFile = File(makeUUID(), size = 1000, events = 100) newFile.addRun(Run(1, *[lumi])) newFile.setLocation("SomePNN", immediateSave = False) newFile.create() self.fileset2.addFile(newFile) self.fileset2.commit() jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription2) mySplitArgs['minInputSize'] = 100000 mySplitArgs['maxInputSize'] = 200000 jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup") self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 3, 'STREAM' : "A", 'FILECOUNT' : 0, 'INSERT_TIME' : self.currentTime, 'CLOSE_TIME' : self.currentTime }, transaction = False) jobGroups = jobFactory(**mySplitArgs) self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, "ERROR: JobFactory didn't create one job") job = jobGroups[0].jobs[0] self.assertEqual(len(job.getFiles()), 4, "ERROR: Job does not process 4 files") return
def createLargerTestJobGroup(self, commitFlag=True): """ _createTestJobGroup_ """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = WMBSFileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10) testFileC.addRun(Run(10, *[12312])) testFileC.setLocation("goodse.cern.ch") testFileC.setLocation("malpaquet") testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10) testFileD.addRun(Run(10, *[12312])) testFileD.setLocation("goodse.cern.ch") testFileD.setLocation("malpaquet") testFileC.create() testFileD.create() testJobA = Job(name="TestJobA1") testJobA.addFile(testFileC) testJobB = Job(name="TestJobB1") testJobB.addFile(testFileD) testJobGroup.add(testJobA) testJobGroup.add(testJobB) for i in range(0, 100): testJob = Job(name="TestJob%i" % (i)) testJob.addFile(testFileC) testJobGroup.add(testJob) if commitFlag: testJobGroup.commit() return testJobGroup
def createFile(self, lfn, events, run, lumis, location): """ _createFile_ Create a file for testing """ newFile = File(lfn=lfn, size=1000, events=events) lumiList = [] for lumi in range(lumis): lumiList.append((run * lumis) + lumi) newFile.addRun(Run(run, *lumiList)) newFile.setLocation(location) return newFile
def testLocationMerging(self): """ _testLocationMerging_ Verify that files residing on different SEs are not merged together in the same job. """ self.stuffWMBS() locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="s2", seName="somese3.cern.ch") fileSite2 = File(lfn="fileSite2", size=4098, events=1024, first_event=0, locations=set(["somese3.cern.ch"])) fileSite2.addRun(Run(1, *[46])) fileSite2.create() fileSite2.addParent(self.parentFileSite2["lfn"]) self.mergeFileset.addFile(fileSite2) self.mergeFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=99999999, max_merge_events=999999999) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 2, \ "ERROR: Two jobs should have been returned." for job in result[0].jobs: firstInputFile = job.getFiles()[0] baseLocation = list(firstInputFile["locations"])[0] for inputFile in job.getFiles(): assert inputFile["locations"] == set(["somese.cern.ch", "somese2.cern.ch"]) or \ inputFile["locations"] == set(["somese3.cern.ch"]), \ "Error: Wrong number of locations" assert list(inputFile["locations"])[0] == baseLocation, \ "Error: Wrong location." return
def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False): """ _createSubscription_ Create a subscription for testing """ baseName = makeUUID() testFileset = Fileset(name = baseName) testFileset.create() parentFile = File('%s_parent' % (baseName), size = 1000, events = 100, locations = set(["somese.cern.ch"])) parentFile.create() for i in range(nFiles): newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000, events = 100, locations = "somese.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) if twoSites: for i in range(nFiles): newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000, events = 100, locations = "otherse.cern.ch") lumis = [] for lumi in range(lumisPerFile): if rand: lumis.append(random.randint(1000 * i, 1000 * (i + 1))) else: lumis.append((100 * i) + lumi) newFile.addRun(Run(i, *lumis)) newFile.create() newFile.addParent(parentFile['lfn']) testFileset.addFile(newFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = self.testWorkflow, split_algo = "LumiBased", type = "Processing") testSubscription.create() return testSubscription
def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name='%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup
def testLocationMerging(self): """ _testLocationMerging_ Verify that files residing on different SEs are not merged together in the same job. """ self.stuffWMBS() locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk") fileSite2 = File(lfn="fileRAL", size=4098, events=1024, first_event=0, locations=set(["T1_UK_RAL_Disk"])) fileSite2.addRun(Run(1, *[46])) fileSite2.create() self.mergeFileset.addFile(fileSite2) self.mergeFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=self.mergeSubscription) result = jobFactory(min_merge_size=4097, max_merge_size=99999999, max_merge_events=999999999, merge_across_runs=False) assert len(result) == 1, \ "ERROR: More than one JobGroup returned." assert len(result[0].jobs) == 3, \ "ERROR: Three jobs should have been returned." ralJobs = 0 fnalJobs = 0 for job in result[0].jobs: if job["possiblePSN"] == set(["T1_UK_RAL"]): ralJobs += 1 elif job["possiblePSN"] == set(["T1_US_FNAL"]): fnalJobs += 1 self.assertEqual(ralJobs, 1) self.assertEqual(fnalJobs, 2) return
def _addDBSFileToWMBSFile(self, dbsFile, storageElements, inFileset = True): """ There are two assumptions made to make this method behave properly, 1. DBS returns only one level of ParentList. If DBS returns multiple level of parentage, it will be still get handled. However that might not be what we wanted. In that case, restrict to one level. 2. Assumes parents files are in the same location as child files. This is not True in general case, but workquue should only select work only where child and parent files are in the same location """ wmbsParents = [] dbsFile.setdefault("ParentList", []) for parent in dbsFile["ParentList"]: wmbsParents.append(self._addDBSFileToWMBSFile(parent, storageElements, inFileset = False)) checksums = {} if dbsFile.get('Checksum'): checksums['cksum'] = dbsFile['Checksum'] if dbsFile.get('Adler32'): checksums['adler32'] = dbsFile['Adler32'] wmbsFile = File(lfn = dbsFile["LogicalFileName"], size = dbsFile["FileSize"], events = dbsFile["NumberOfEvents"], checksums = checksums, #TODO: need to get list of parent lfn parents = wmbsParents, locations = set(storageElements)) for lumi in dbsFile['LumiList']: if type(lumi['LumiSectionNumber']) == list: run = Run(lumi['RunNumber'], *lumi['LumiSectionNumber']) else: run = Run(lumi['RunNumber'], lumi['LumiSectionNumber']) wmbsFile.addRun(run) self._addToDBSBuffer(dbsFile, checksums, storageElements) logging.info("WMBS File: %s\n on Location: %s" % (wmbsFile['lfn'], wmbsFile['newlocations'])) if inFileset: wmbsFile['inFileset'] = True else: wmbsFile['inFileset'] = False self.wmbsFilesToCreate.append(wmbsFile) return wmbsFile