def loadData(self): """ _loadData_ Load all information about the job, including the mask and all input files. Either the ID or the name must be specified before this is called. """ existingTransaction = self.beginTransaction() self.load() self.getMask() fileAction = self.daofactory(classname="Jobs.LoadFiles") files = fileAction.execute(self["id"], conn=self.getDBConn(), transaction=self.existingTransaction()) self["input_files"] = [] for file in files: newFile = File(id=file["id"]) newFile.loadData(parentage=0) self.addFile(newFile) self.commitTransaction(existingTransaction) return
def testSetLocationByLFN(self): """ _testSetLocationByLFN_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10, checksums = {'cksum':1}) testFileB.addRun(Run( 1, *[45])) testFileB.create() parentAction = self.daofactory(classname = "Files.SetLocationByLFN") binds = [{'lfn': "/this/is/a/lfnA", 'location': 'se1.fnal.gov'}, {'lfn': "/this/is/a/lfnB", 'location': 'se1.fnal.gov'}] parentAction.execute(lfn = binds) testFileC = File(id = testFileA["id"]) testFileC.loadData() testFileD = File(id = testFileB["id"]) testFileD.loadData() self.assertEqual(testFileC['locations'], set(['se1.fnal.gov'])) self.assertEqual(testFileD['locations'], set(['se1.fnal.gov'])) return
def testSetLocation(self): """ _testSetLocation_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"]) testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"], immediateSave = False) testFileB = File(id = testFileA["id"]) testFileB.loadData() goldenLocations = ["se1.fnal.gov", "se1.cern.ch"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def testCreateWithLocation(self): """ _testCreateWithLocation_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}, locations = set(["T1_US_FNAL_Disk", "T2_CH_CERN"])) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(id = testFileA["id"]) testFileB.loadData() goldenLocations = ["T1_US_FNAL_Disk", "T2_CH_CERN"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def loadData(self, parentage=1): """ _loadData_ Load all the files that belong to this fileset. """ existingTransaction = self.beginTransaction() if self.name is None or self.id < 0: self.load() action = self.daofactory(classname="Files.InFileset") results = action.execute(fileset=self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) self.files = set() self.newfiles = set() for result in results: thisFile = File(id=result["fileid"]) thisFile.loadData(parentage=parentage) self.files.add(thisFile) self.commitTransaction(existingTransaction) return
def testSetLocation(self): """ _testSetLocation_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"]) testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"], immediateSave=False) testFileB = File(id=testFileA["id"]) testFileB.loadData() goldenLocations = ["se1.fnal.gov", "se1.cern.ch"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def testAddChild(self): """ _testAddChild_ Add a child to some parent files and make sure that all the parentage information is loaded/stored correctly from the database. """ testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={"cksum": 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={"cksum": 1}) testFileParentB.addRun(Run(1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={"cksum": 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileParentA.addChild("/this/is/a/lfn") testFileParentB.addChild("/this/is/a/lfn") testFileB = File(id=testFileA["id"]) testFileB.loadData(parentage=1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: assert parentFile in goldenFiles, "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, "ERROR: Some parents are missing" return
def loadData(self): """ _loadData_ Load all the files that belong to this fileset. """ existingTransaction = self.beginTransaction() if self.name == None or self.id < 0: self.load() action = self.daofactory(classname = "Files.InFileset") results = action.execute(fileset = self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) self.files = set() self.newfiles = set() for result in results: file = File(id = result["fileid"]) file.loadData(parentage = 1) self.files.add(file) self.commitTransaction(existingTransaction) return
def loadData(self): """ _loadData_ Load all information about the job, including the mask and all input files. Either the ID or the name must be specified before this is called. """ existingTransaction = self.beginTransaction() self.load() self.getMask() fileAction = self.daofactory(classname = "Jobs.LoadFiles") files = fileAction.execute(self["id"], conn = self.getDBConn(), transaction = self.existingTransaction()) self["input_files"] = [] for file in files: newFile = File(id = file["id"]) newFile.loadData(parentage = 0) self.addFile(newFile) self.commitTransaction(existingTransaction) return
def testBulkParentage(self): """ _testBulkParentage_ Verify that the bulk parentage dao correctly sets file parentage. """ testFileChildA = File(lfn = "/this/is/a/child/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileChildB = File(lfn = "/this/is/a/child/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileChildA.create() testFileChildB.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.create() testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10, checksums = {'cksum':1}) testFileB.create() testFileC = File(lfn = "/this/is/a/lfnC", size = 1024, events = 10, checksums = {'cksum':1}) testFileC.create() parentage = [{"child": testFileChildA["id"], "parent": testFileA["id"]}, {"child": testFileChildA["id"], "parent": testFileB["id"]}, {"child": testFileChildA["id"], "parent": testFileC["id"]}, {"child": testFileChildB["id"], "parent": testFileA["id"]}, {"child": testFileChildB["id"], "parent": testFileB["id"]}] myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) bulkParentageAction = daofactory(classname = "Files.AddBulkParentage") bulkParentageAction.execute(parentage) testFileD = File(id = testFileChildA["id"]) testFileD.loadData(parentage = 1) testFileE = File(id = testFileChildB["id"]) testFileE.loadData(parentage = 1) goldenFiles = [testFileA, testFileB, testFileC] for parentFile in testFileD["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing" goldenFiles = [testFileA, testFileB] for parentFile in testFileE["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing" return
def testAddChildTransaction(self): """ _testAddChildTransaction_ Add a child to some parent files and make sure that all the parentage information is loaded/stored correctly from the database. Rollback the addition of one of the childs and then verify that it does in fact only have one parent. """ testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum': 1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileParentA.addChild("/this/is/a/lfn") myThread = threading.currentThread() myThread.transaction.begin() testFileParentB.addChild("/this/is/a/lfn") testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing" myThread.transaction.rollback() testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA] for parentFile in testFileB["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing" return
def testParentageByJob(self): """ _testParentageByJob_ Tests the DAO that assigns parentage by Job """ testWorkflow = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testJobA = Job() testJobA.create(group = testJobGroup) testJobA.addFile(testFileParentA) testJobA.addFile(testFileParentB) testJobA.associateFiles() parentAction = self.daofactory(classname = "Files.SetParentageByJob") parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']}) testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, True, "ERROR: Unknown parent file") goldenFiles.remove(parentFile) self.assertEqual(len(goldenFiles), 0, "ERROR: Some parents are missing")
def testSetLocationTransaction(self): """ _testSetLocationTransaction_ Create a file at specific locations and commit everything to the database. Reload the file from the database and verify that the locations are correct. Rollback the database transaction and once again reload the file. Verify that the original locations are back. """ testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileA.setLocation(["se1.fnal.gov"]) myThread = threading.currentThread() myThread.transaction.begin() testFileA.setLocation(["se1.cern.ch"]) testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"], immediateSave=False) testFileB = File(id=testFileA["id"]) testFileB.loadData() goldenLocations = ["se1.fnal.gov", "se1.cern.ch"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" myThread.transaction.rollback() testFileB.loadData() goldenLocations = ["se1.fnal.gov"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def testLocationsConstructor(self): """ _testLocationsConstructor_ Test to make sure that locations passed into the File() constructor are loaded from and save to the database correctly. Also test to make sure that the class behaves well when the location is passed in as a single string instead of a set. """ testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}, locations=set(["se1.fnal.gov"])) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileB = File(lfn="/this/is/a/lfn2", size=1024, events=10, checksums={'cksum': 1}, locations="se1.fnal.gov") testFileB.addRun(Run(1, *[45])) testFileB.create() testFileC = File(id=testFileA["id"]) testFileC.loadData() goldenLocations = ["se1.fnal.gov"] for location in testFileC["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" testFileC = File(id=testFileB["id"]) testFileC.loadData() goldenLocations = ["se1.fnal.gov"] for location in testFileC["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def testSetLocationTransaction(self): """ _testSetLocationTransaction_ Create a file at specific locations and commit everything to the database. Reload the file from the database and verify that the locations are correct. Rollback the database transaction and once again reload the file. Verify that the original locations are back. """ testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileA.setLocation(["se1.fnal.gov"]) myThread = threading.currentThread() myThread.transaction.begin() testFileA.setLocation(["se1.cern.ch"]) testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"], immediateSave = False) testFileB = File(id = testFileA["id"]) testFileB.loadData() goldenLocations = ["se1.fnal.gov", "se1.cern.ch"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" myThread.transaction.rollback() testFileB.loadData() goldenLocations = ["se1.fnal.gov"] for location in testFileB["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def testLoadData(self): """ _testLoadData_ Test the loading of all data from a file, including run/lumi associations, location information and parentage information. """ testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={'cksum': 1}) testFileParentB.addRun(Run(1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileA.setLocation(se="se1.fnal.gov", immediateSave=False) testFileA.setLocation(se="se1.cern.ch", immediateSave=False) testFileA.addParent("/this/is/a/parent/lfnA") testFileA.addParent("/this/is/a/parent/lfnB") testFileA.updateLocations() testFileB = File(lfn=testFileA["lfn"]) testFileB.loadData(parentage=1) testFileC = File(id=testFileA["id"]) testFileC.loadData(parentage=1) assert testFileA == testFileB, \ "ERROR: File load by LFN didn't work" assert testFileA == testFileC, \ "ERROR: File load by ID didn't work" testFileA.delete() testFileParentA.delete() testFileParentB.delete() return
def testDataStructsFile(self): """ _testDataStructsFile_ Tests our ability to create a WMBS file from a DataStructs File and vice versa """ myThread = threading.currentThread() testLFN = "lfn1" testSize = 1024 testEvents = 100 testCksum = {"cksum": '1'} testParents = set(["lfn2"]) testRun = Run(1, *[45]) testSE = "se1.cern.ch" parentFile = File(lfn="lfn2") parentFile.create() testFile = File() inputFile = WMFile(lfn=testLFN, size=testSize, events=testEvents, checksums=testCksum, parents=testParents) inputFile.addRun(testRun) inputFile.setLocation(se=testSE) testFile.loadFromDataStructsFile(file=inputFile) testFile.create() testFile.save() loadFile = File(lfn="lfn1") loadFile.loadData(parentage=1) self.assertEqual(loadFile['size'], testSize) self.assertEqual(loadFile['events'], testEvents) self.assertEqual(loadFile['checksums'], testCksum) self.assertEqual(loadFile['locations'], set([testSE])) #self.assertEqual(loadFile['parents'].pop()['lfn'], 'lfn2') wmFile = loadFile.returnDataStructsFile() self.assertEqual(wmFile == inputFile, True) return
def testAddChecksumsByLFN(self): """ _testAddChecksumsByLFN_ Tests for adding checksums by DAO by LFN """ testWorkflow = Workflow(spec="hello", owner="mnorman", name="wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[45])) testFileB.create() testJobA = Job() testJobA.create(group=testJobGroup) testJobA.associateFiles() parentAction = self.daofactory(classname="Files.AddChecksumByLFN") binds = [ {"lfn": testFileA["lfn"], "cktype": "cksum", "cksum": 101}, {"lfn": testFileA["lfn"], "cktype": "adler32", "cksum": 201}, {"lfn": testFileB["lfn"], "cktype": "cksum", "cksum": 101}, ] parentAction.execute(bulkList=binds) testFileC = File(id=testFileA["id"]) testFileC.loadData() testFileD = File(id=testFileB["id"]) testFileD.loadData() self.assertEqual(testFileC["checksums"], {"adler32": "201", "cksum": "101"}) self.assertEqual(testFileD["checksums"], {"cksum": "101"}) return
def testDataStructsFile(self): """ _testDataStructsFile_ Tests our ability to create a WMBS file from a DataStructs File and vice versa """ myThread = threading.currentThread() testLFN = "lfn1" testSize = 1024 testEvents = 100 testCksum = {"cksum": '1'} testParents = set(["lfn2"]) testRun = Run( 1, *[45]) testSE = "se1.cern.ch" parentFile = File(lfn= "lfn2") parentFile.create() testFile = File() inputFile = WMFile(lfn = testLFN, size = testSize, events = testEvents, checksums = testCksum, parents = testParents) inputFile.addRun(testRun) inputFile.setLocation(se = testSE) testFile.loadFromDataStructsFile(file = inputFile) testFile.create() testFile.save() loadFile = File(lfn = "lfn1") loadFile.loadData(parentage = 1) self.assertEqual(loadFile['size'], testSize) self.assertEqual(loadFile['events'], testEvents) self.assertEqual(loadFile['checksums'], testCksum) self.assertEqual(loadFile['locations'], set([testSE])) #self.assertEqual(loadFile['parents'].pop()['lfn'], 'lfn2') wmFile = loadFile.returnDataStructsFile() self.assertEqual(wmFile == inputFile, True) return
def testLocationsConstructor(self): """ _testLocationsConstructor_ Test to make sure that locations passed into the File() constructor are loaded from and save to the database correctly. Also test to make sure that the class behaves well when the location is passed in as a single string instead of a set. """ testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}, locations = set(["se1.fnal.gov"])) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(lfn = "/this/is/a/lfn2", size = 1024, events = 10, checksums = {'cksum':1}, locations = "se1.fnal.gov") testFileB.addRun(Run( 1, *[45])) testFileB.create() testFileC = File(id = testFileA["id"]) testFileC.loadData() goldenLocations = ["se1.fnal.gov"] for location in testFileC["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" testFileC = File(id = testFileB["id"]) testFileC.loadData() goldenLocations = ["se1.fnal.gov"] for location in testFileC["locations"]: assert location in goldenLocations, \ "ERROR: Unknown file location" goldenLocations.remove(location) assert len(goldenLocations) == 0, \ "ERROR: Some locations are missing" return
def testAddChild(self): """ _testAddChild_ Add a child to some parent files and make sure that all the parentage information is loaded/stored correctly from the database. """ testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={'cksum': 1}) testFileParentB.addRun(Run(1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileParentA.addChild("/this/is/a/lfn") testFileParentB.addChild("/this/is/a/lfn") testFileB = File(id=testFileA["id"]) testFileB.loadData(parentage=1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing" return
def testCreateWithParent(self): """ Test passing parnents arguments in file creation. check if parent file does not exist, it create the file and set the parentage """ # create parent file before it got added to child file. testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentA.create() # don't create create parent file before it got added to child file. testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={'cksum': 1}) testFileParentB.addRun(Run(1, *[45])) testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}, parents=[testFileParentA, testFileParentB]) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileB = File(id=testFileA["id"]) testFileB.loadData(parentage=1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing"
def testLoadData(self): """ _testLoadData_ Test the loading of all data from a file, including run/lumi associations, location information and parentage information. """ testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileA.setLocation(se = "se1.fnal.gov", immediateSave = False) testFileA.setLocation(se = "se1.cern.ch", immediateSave = False) testFileA.addParent("/this/is/a/parent/lfnA") testFileA.addParent("/this/is/a/parent/lfnB") testFileA.updateLocations() testFileB = File(lfn = testFileA["lfn"]) testFileB.loadData(parentage = 1) testFileC = File(id = testFileA["id"]) testFileC.loadData(parentage = 1) assert testFileA == testFileB, \ "ERROR: File load by LFN didn't work" assert testFileA == testFileC, \ "ERROR: File load by ID didn't work" testFileA.delete() testFileParentA.delete() testFileParentB.delete() return
def test_SetLocationsForWorkQueue(self): """ _SetLocationsForWorkQueue_ Test the code that sets locations for the WorkQueue This is more complicated then it seems. """ action = self.daofactory(classname = "Files.SetLocationForWorkQueue") testFile = File(lfn = "myLFN", size = 1024, events = 10, checksums={'cksum':1111}) testFile.create() tFile1 = File(lfn = "myLFN") tFile1.loadData() locations = tFile1.getLocations() self.assertEqual(locations, []) binds = [{'lfn': 'myLFN', 'location': 'se1.cern.ch'}] action.execute(lfns = ['myLFN'], locations = binds) tFile1.loadData() locations = tFile1.getLocations() self.assertEqual(locations, ['se1.cern.ch']) binds = [{'lfn': 'myLFN', 'location': 'se1.fnal.gov'}] action.execute(lfns = ['myLFN'], locations = binds) tFile1.loadData() locations = tFile1.getLocations() self.assertEqual(locations, ['se1.fnal.gov']) return
def testSetLocationByLFN(self): """ _testSetLocationByLFN_ Create a file and add a couple locations. Load the file from the database to make sure that the locations were set correctly. """ testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10, checksums={'cksum': 1}) testFileB.addRun(Run(1, *[45])) testFileB.create() parentAction = self.daofactory(classname="Files.SetLocationByLFN") binds = [{ 'lfn': "/this/is/a/lfnA", 'location': 'se1.fnal.gov' }, { 'lfn': "/this/is/a/lfnB", 'location': 'se1.fnal.gov' }] parentAction.execute(lfn=binds) testFileC = File(id=testFileA["id"]) testFileC.loadData() testFileD = File(id=testFileB["id"]) testFileD.loadData() self.assertEqual(testFileC['locations'], set(['se1.fnal.gov'])) self.assertEqual(testFileD['locations'], set(['se1.fnal.gov'])) return
def testCreateWithParent(self): """ Test passing parnents arguments in file creation. check if parent file does not exist, it create the file and set the parentage """ # create parent file before it got added to child file. testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentA.create() # don't create create parent file before it got added to child file. testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}, parents = [testFileParentA, testFileParentB]) testFileA.addRun(Run( 1, *[45])) testFileA.create() testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing"
def test_SetLocationsForWorkQueue(self): """ _SetLocationsForWorkQueue_ Test the code that sets locations for the WorkQueue This is more complicated then it seems. """ action = self.daofactory(classname="Files.SetLocationForWorkQueue") testFile = File(lfn="myLFN", size=1024, events=10, checksums={'cksum': 1111}) testFile.create() tFile1 = File(lfn="myLFN") tFile1.loadData() locations = tFile1.getLocations() self.assertEqual(locations, []) binds = [{'lfn': 'myLFN', 'location': 'se1.cern.ch'}] action.execute(lfns=['myLFN'], locations=binds) tFile1.loadData() locations = tFile1.getLocations() self.assertEqual(locations, ['se1.cern.ch']) binds = [{'lfn': 'myLFN', 'location': 'se1.fnal.gov'}] action.execute(lfns=['myLFN'], locations=binds) tFile1.loadData() locations = tFile1.getLocations() self.assertEqual(locations, ['se1.fnal.gov']) return
if newfile.exists() == False: newfile.create() filesetToProcess.addFile(newfile) filesetToProcess.setLastUpdate(\ int(time.time())) filesetToProcess.commit() runSet = set() runSet.add(Run( lumi\ ['RunNumber' ], *[lumi['LumiSectionNumber']] )) newfile.addRunSet(runSet) else: newfile.loadData() listFile = fileInFileset.execute\ (filesetToProcess.id) if {'fileid': newfile[\ 'id']} not in listFile: filesetToProcess.addFile(newfile) filesetToProcess.setLastUpdate\ (int(time.time())) filesetToProcess.commit() val = 0 for run in newfile['runs']: if lumi['RunNumber'] == run.run:
def testAddChildTransaction(self): """ _testAddChildTransaction_ Add a child to some parent files and make sure that all the parentage information is loaded/stored correctly from the database. Rollback the addition of one of the childs and then verify that it does in fact only have one parent. """ testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={'cksum': 1}) testFileParentB.addRun(Run(1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileParentA.addChild("/this/is/a/lfn") myThread = threading.currentThread() myThread.transaction.begin() testFileParentB.addChild("/this/is/a/lfn") testFileB = File(id=testFileA["id"]) testFileB.loadData(parentage=1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing" myThread.transaction.rollback() testFileB.loadData(parentage=1) goldenFiles = [testFileParentA] for parentFile in testFileB["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing" return
def testBulkParentage(self): """ _testBulkParentage_ Verify that the bulk parentage dao correctly sets file parentage. """ testFileChildA = File(lfn="/this/is/a/child/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileChildB = File(lfn="/this/is/a/child/lfnB", size=1024, events=20, checksums={'cksum': 1}) testFileChildA.create() testFileChildB.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, checksums={'cksum': 1}) testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10, checksums={'cksum': 1}) testFileB.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10, checksums={'cksum': 1}) testFileC.create() parentage = [{ "child": testFileChildA["id"], "parent": testFileA["id"] }, { "child": testFileChildA["id"], "parent": testFileB["id"] }, { "child": testFileChildA["id"], "parent": testFileC["id"] }, { "child": testFileChildB["id"], "parent": testFileA["id"] }, { "child": testFileChildB["id"], "parent": testFileB["id"] }] myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) bulkParentageAction = daofactory(classname="Files.AddBulkParentage") bulkParentageAction.execute(parentage) testFileD = File(id=testFileChildA["id"]) testFileD.loadData(parentage=1) testFileE = File(id=testFileChildB["id"]) testFileE.loadData(parentage=1) goldenFiles = [testFileA, testFileB, testFileC] for parentFile in testFileD["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing" goldenFiles = [testFileA, testFileB] for parentFile in testFileE["parents"]: assert parentFile in goldenFiles, \ "ERROR: Unknown parent file" goldenFiles.remove(parentFile) assert len(goldenFiles) == 0, \ "ERROR: Some parents are missing" return
def testParentageByJob(self): """ _testParentageByJob_ Tests the DAO that assigns parentage by Job """ testWorkflow = Workflow(spec='hello', owner="mnorman", name="wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileParentA = File(lfn="/this/is/a/parent/lfnA", size=1024, events=20, checksums={'cksum': 1}) testFileParentA.addRun(Run(1, *[45])) testFileParentB = File(lfn="/this/is/a/parent/lfnB", size=1024, events=20, checksums={'cksum': 1}) testFileParentB.addRun(Run(1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10, checksums={'cksum': 1}) testFileA.addRun(Run(1, *[45])) testFileA.create() testJobA = Job() testJobA.create(group=testJobGroup) testJobA.addFile(testFileParentA) testJobA.addFile(testFileParentB) testJobA.associateFiles() parentAction = self.daofactory(classname="Files.SetParentageByJob") parentAction.execute(binds={ 'jobid': testJobA.exists(), 'child': testFileA['lfn'] }) testFileB = File(id=testFileA["id"]) testFileB.loadData(parentage=1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, True, "ERROR: Unknown parent file") goldenFiles.remove(parentFile) self.assertEqual(len(goldenFiles), 0, "ERROR: Some parents are missing")
class T0ASTFile(dict): """ _T0ASTFile_ A dictionary based object meant to represent a file in T0AST. It contains the following keys: ID STREAM RUN_ID FILESIZE EVENTS CKSUM LFN DATASET_ID DATA_TIER TYPE STATUS EXPORT_STATUS STREAM STREAM_ID LUMI_ID LUMI_LIST PARENT_LIST PSET_HASH BRANCH_HASH LOCATIONS """ def __init__(self, wmbsFile=None, **args): """ ___init___ Initialize all attributes. """ dict.__init__(self) self.runLumi = None # place holder for list of parent(repacked) files names # only used for reco type, for repacked this should be empty self.parentList = None self.parentIDList = None # this is the dataset_path_ID in data table not primary dataset id self.datasetPathID = None # place holder for PSet hash, Gets from Job Report self.setdefault("RUN_ID", None) self.setdefault("STREAM", None) self.setdefault("DATASET_ID", None) self.setdefault("PRIMARY_DATASET", None) self.setdefault("PROCESSED_DATASET", None) self.setdefault("DATA_TIER", None) self.setdefault("BLOCK_ID", None) self.setdefault("STREAM", None) self.setdefault("STREAM_ID", None) # currently using WM Run structure here but need to use lumi structure # place holder for list of lumi data struct #self.setdefault("LUMI_LIST", []) self.setdefault("PSET_HASH", None) self.setdefault("BRANCH_HASH", None) self.setdefault("LOCATIONS", None) self.wmbsFile = wmbsFile if wmbsFile == None: self.setdefault("ID", None) self.setdefault("FILESIZE", None) self.setdefault("EVENTS", None) self.setdefault("FIRST_EVENT", 0) self.setdefault("LAST_EVENT", 0) self.setdefault("CKSUM", None) self.setdefault("LFN", None) # this is primary dataset ID self.update(args) else: self["ID"] = wmbsFile["id"] self["FILESIZE"] = wmbsFile["size"] self["CKSUM"] = wmbsFile["cksum"] self["EVENTS"] = wmbsFile["events"] self["FIRST_EVENT"] = wmbsFile["first_event"] self["LAST_EVENT"] = wmbsFile["last_event"] self["LFN"] = wmbsFile["lfn"] def convertToWMBSFile(self): """ __getWMBSFile__ return WMBSFile instance converted from T0AST file. """ if self.wmbsFile == None: # hack: make sure first_event/last_event is None # if they are add 0 event if self["FIRST_EVENT"] == None: self["FIRST_EVENT"] = 0 if self["LAST_EVENT"] == None: self["LAST_EVENT"] = 0 self.wmbsFile = WMBSFile(lfn=self["LFN"], size=self["FILESIZE"], events=self["EVENTS"], cksum=self["CKSUM"], first_event = self["FIRST_EVENT"], last_event = self["LAST_EVENT"], locations=self["LOCATIONS"]) self.wmbsFile.create() self["ID"] = self.wmbsFile["id"] return self.wmbsFile def getParentList(self, dataType='lfn', stream="Normal"): """ _getParentList_ """ if self["DATA_TIER"] == "RAW": return [] #TODO: place holder for correct express stream handling if stream == "Express": # TODO get using the lumi comparason # For now return empty list for parent return [] self.convertToWMBSFile() if dataType == "id": parents = self.parentIDList elif dataType == "lfn": parents = self.parentList else: raise Exception, "Unknown Type" if parents == None: # this will retrun right parents for both RECO -> RAW and # RAW -> [] files except the big files merged directly if self["DATA_TIER"] == "ALCARECO" \ and alcaNewSelection(): parents = self.wmbsFile.getAncestors( level=3, type=dataType) else: parents = self.wmbsFile.getAncestors(level=2, type=dataType) #update the private variable to prevent expensive call over and over again if dataType == "id": self.parentIDList = parents elif dataType == "lfn": self.parentList = parents return parents def getLumiList(self): """ _getLumiList_ """ self.convertToWMBSFile() if self.runLumi == None: if len(self.wmbsFile["runs"]) == 0: self.wmbsFile.loadData() self.runLumi = list(self.wmbsFile["runs"])[0] self["RUN_ID"] = self.runLumi.run return self.runLumi def getRunID(self): """ _getRunID_ """ self.convertToWMBSFile() if self["RUN_ID"] == None: if len(self.wmbsFile["runs"]) == 0: self.wmbsFile.loadData() self.runLumi = list(self.wmbsFile["runs"])[0] self["RUN_ID"] = self.runLumi.run return self["RUN_ID"] def getDatasetPathID(self): """ _getDatasetPathID_ To do: this need database connection to get the value. need to be modified like wmbs wrapper class """ return self.datasetPathID
def __call__(self, filesetToProcess): """ The algorithm itself """ global LOCK # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) locationNew = daofactory(classname = "Locations.New") getFileLoc = daofactory(classname = "Files.GetLocation") logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) startRun = (filesetToProcess.name).split(":")[3] fileType = (filesetToProcess.name).split(":")[2] # url builder primaryDataset = ((filesetToProcess.name).split(":")[0]).split('/')[1] processedDataset = ((filesetToProcess.name).split(":")[0]).split('/')[2] dataTier = (((filesetToProcess.name\ ).split(":")[0]).split('/')[3]).split('-')[0] # Fisrt call to T0 db for this fileset # Here add test for the closed fileset LASTIME = filesetToProcess.lastUpdate url = "/tier0/listfilesoverinterval/%s/%s/%s/%s/%s" % \ (fileType, LASTIME, primaryDataset,processedDataset, dataTier) tries = 1 while True: try: myRequester = JSONRequests(url = "vocms52.cern.ch:8889") requestResult = myRequester.get(\ url+"/"+"?return_type=text/json%2Bdas") newFilesList = requestResult[0]["results"] except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0ASTRun queries done ...") now = time.time() filesetToProcess.last_update = now LASTIME = int(newFilesList['end_time']) + 1 break # process all files if len(newFilesList['files']): LOCK.acquire() try: locationNew.execute(siteName = "caf.cern.ch", seName = "caf.cern.ch") except Exception as e: logging.debug("Error when adding new location...") logging.debug(e) logging.debug( format_exc() ) for files in newFilesList['files']: # Assume parents aren't asked newfile = File(str(files['lfn']), \ size = files['file_size'], events = files['events']) try: if newfile.exists() == False : newfile.create() else: newfile.loadData() #Add run test if already exist for run in files['runs']: if startRun != 'None' and int(startRun) <= int(run): # ToDo: Distinguish between # filestA-RunX and filesetA-Run[0-9]* filesetRun = Fileset( name = (((\ filesetToProcess.name).split(':')[0]).split('/')[0]\ )+'/'+(((filesetToProcess.name).split(':')[0]).split\ ('/')[1])+'/'+(((filesetToProcess.name).split(':')[0]\ ).split('/')[2])+'/'+((((filesetToProcess.name).split\ (':')[0]).split('/')[3]).split('-')[0])+'-'+'Run'+str\ (run)+":"+":".join((filesetToProcess.name).split(':')[1:] \ ) ) if filesetRun.exists() == False : filesetRun.create() else: filesetRun.loadData() # Add test runs already there # (for growing dataset) - # to support file with different runs and lumi if not newfile['runs']: runSet = set() runSet.add(Run( run, *files['runs'][run])) newfile.addRunSet(runSet) fileLoc = getFileLoc.execute(file = files['lfn']) if 'caf.cern.ch' not in fileLoc: newfile.setLocation("caf.cern.ch") filesetRun.addFile(newfile) logging.debug("new file created/loaded added by T0ASTRun...") filesetRun.commit() except Exception as e: logging.debug("Error when adding new files in T0ASTRun...") logging.debug(e) logging.debug( format_exc() ) filesetToProcess.setLastUpdate\ (int(newFilesList['end_time']) + 1) filesetToProcess.commit() LOCK.release() else: logging.debug("nothing to do...") # For re-opned fileset or empty, try until the purge time if (int(now)/3600 - LASTIME/3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() if LASTIME: myRequester = JSONRequests(url = "vocms52.cern.ch:8889") requestResult = myRequester.get("/tier0/runs") for listRun in requestResult[0]: if int(startRun) <= int(listRun['run']): if listRun['status'] =='CloseOutExport' or \ listRun['status'] =='Complete' or listRun['status'] ==\ 'CloseOutT1Skimming': closeFileset = Fileset( name = (((\ filesetToProcess.name).split(':')[0]).split('/')[0])+'/'+\ (((filesetToProcess.name).split(':')[0]).split('/')[1]\ )+'/'+(((filesetToProcess.name).split(':')[0]).split('/')\ [2])+'/'+((((filesetToProcess.name).split(':')[0]).split\ ('/')[3]).split('-')[0])+'-'+'Run'+str(listRun['run'])\ +":"+":".join((filesetToProcess.name).split(':')[1:] ) ) if closeFileset.exists() != False : closeFileset = Fileset( id = closeFileset.exists()) closeFileset.loadData() if closeFileset.open == True: closeFileset.markOpen(False) # Commit the fileset filesetToProcess.commit() # Commit the fileset logging.debug("Test purge in T0ASTRun ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate if (int(now)/3600 - LASTIME/3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...") filesetToProcess.commit()
def __call__(self, filesetToProcess): """ The algorithm itself """ global LOCK # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) locationNew = daofactory(classname="Locations.New") getFileLoc = daofactory(classname="Files.GetLocation") fileInFileset = daofactory(classname="Files.InFileset") logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) # Get the start Run if asked startRun = (filesetToProcess.name).split(":")[3] fileType = (filesetToProcess.name).split(":")[2] LASTIME = filesetToProcess.lastUpdate # url builder primaryDataset = ((filesetToProcess.name).split(":")[0]).split('/')[1] processedDataset = (( filesetToProcess.name).split(":")[0]).split('/')[2] dataTier = ((filesetToProcess.name\ ).split(":")[0]).split('/')[3] url = "/tier0/listfilesoverinterval/%s/%s/%s/%s/%s" % \ (fileType, LASTIME, primaryDataset,processedDataset, dataTier) tries = 1 while True: try: myRequester = JSONRequests(url="vocms52.cern.ch:8889") requestResult = myRequester.get(\ url+"/"+"?return_type=text/json%2Bdas") newFilesList = requestResult[0]["results"] except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0 queries done ...") now = time.time() LASTIME = int(newFilesList['end_time']) + 1 break # process all files if len(newFilesList['files']): try: locationNew.execute(siteName="caf.cern.ch", seName="caf.cern.ch") except Exception as e: logging.debug("Error when adding new location...") logging.debug(e) logging.debug(format_exc()) for files in newFilesList['files']: # Assume parents aren't asked newfile = File(str(files['lfn']), \ size = files['file_size'], events = files['events']) try: LOCK.acquire() if newfile.exists() == False: newfile.create() for run in files['runs']: runSet = set() runSet.add(Run(run, *files['runs'][run])) newfile.addRunSet(runSet) else: newfile.loadData() fileLoc = getFileLoc.execute(file=files['lfn']) if 'caf.cern.ch' not in fileLoc: newfile.setLocation("caf.cern.ch") # else: # logging.debug("File already associated to %s" %fileLoc) LOCK.release() if len(newfile["runs"]): val = 0 for run in newfile['runs']: if run.run < int(startRun): val = 1 break if not val: listFile = fileInFileset.execute\ (filesetToProcess.id) if {'fileid': newfile['id']} not in listFile: filesetToProcess.addFile(newfile) filesetToProcess.setLastUpdate\ (int(newFilesList['end_time']) + 1) filesetToProcess.commit() logging.debug( "new file created/loaded added by T0AST..." ) except Exception as e: logging.debug("Error when adding new files in T0AST...") logging.debug(e) logging.debug(format_exc()) LOCK.release() filesetToProcess.commit() else: logging.debug("nothing to do in T0AST...") # For reopned fileset or empty # try until the purge time is reached if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() # Commit the fileset logging.debug("Test purge in T0AST ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...") filesetToProcess.commit()
def __call__(self, filesetToProcess): """ The algorithm itself """ global LOCK # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) locationNew = daofactory(classname="Locations.New") getFileLoc = daofactory(classname="Files.GetLocation") logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) startRun = (filesetToProcess.name).split(":")[3] fileType = (filesetToProcess.name).split(":")[2] # url builder primaryDataset = ((filesetToProcess.name).split(":")[0]).split('/')[1] processedDataset = (( filesetToProcess.name).split(":")[0]).split('/')[2] dataTier = (((filesetToProcess.name\ ).split(":")[0]).split('/')[3]).split('-')[0] # Fisrt call to T0 db for this fileset # Here add test for the closed fileset LASTIME = filesetToProcess.lastUpdate url = "/tier0/listfilesoverinterval/%s/%s/%s/%s/%s" % \ (fileType, LASTIME, primaryDataset,processedDataset, dataTier) tries = 1 while True: try: myRequester = JSONRequests(url="vocms52.cern.ch:8889") requestResult = myRequester.get(\ url+"/"+"?return_type=text/json%2Bdas") newFilesList = requestResult[0]["results"] except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0ASTRun queries done ...") now = time.time() filesetToProcess.last_update = now LASTIME = int(newFilesList['end_time']) + 1 break # process all files if len(newFilesList['files']): LOCK.acquire() try: locationNew.execute(siteName="caf.cern.ch", seName="caf.cern.ch") except Exception as e: logging.debug("Error when adding new location...") logging.debug(e) logging.debug(format_exc()) for files in newFilesList['files']: # Assume parents aren't asked newfile = File(str(files['lfn']), \ size = files['file_size'], events = files['events']) try: if newfile.exists() == False: newfile.create() else: newfile.loadData() #Add run test if already exist for run in files['runs']: if startRun != 'None' and int(startRun) <= int(run): # ToDo: Distinguish between # filestA-RunX and filesetA-Run[0-9]* filesetRun = Fileset( name = (((\ filesetToProcess.name).split(':')[0]).split('/')[0]\ )+'/'+(((filesetToProcess.name).split(':')[0]).split\ ('/')[1])+'/'+(((filesetToProcess.name).split(':')[0]\ ).split('/')[2])+'/'+((((filesetToProcess.name).split\ (':')[0]).split('/')[3]).split('-')[0])+'-'+'Run'+str\ (run)+":"+":".join((filesetToProcess.name).split(':')[1:] \ ) ) if filesetRun.exists() == False: filesetRun.create() else: filesetRun.loadData() # Add test runs already there # (for growing dataset) - # to support file with different runs and lumi if not newfile['runs']: runSet = set() runSet.add(Run(run, *files['runs'][run])) newfile.addRunSet(runSet) fileLoc = getFileLoc.execute(file=files['lfn']) if 'caf.cern.ch' not in fileLoc: newfile.setLocation("caf.cern.ch") filesetRun.addFile(newfile) logging.debug( "new file created/loaded added by T0ASTRun...") filesetRun.commit() except Exception as e: logging.debug("Error when adding new files in T0ASTRun...") logging.debug(e) logging.debug(format_exc()) filesetToProcess.setLastUpdate\ (int(newFilesList['end_time']) + 1) filesetToProcess.commit() LOCK.release() else: logging.debug("nothing to do...") # For re-opned fileset or empty, try until the purge time if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() if LASTIME: myRequester = JSONRequests(url="vocms52.cern.ch:8889") requestResult = myRequester.get("/tier0/runs") for listRun in requestResult[0]: if int(startRun) <= int(listRun['run']): if listRun['status'] =='CloseOutExport' or \ listRun['status'] =='Complete' or listRun['status'] ==\ 'CloseOutT1Skimming': closeFileset = Fileset( name = (((\ filesetToProcess.name).split(':')[0]).split('/')[0])+'/'+\ (((filesetToProcess.name).split(':')[0]).split('/')[1]\ )+'/'+(((filesetToProcess.name).split(':')[0]).split('/')\ [2])+'/'+((((filesetToProcess.name).split(':')[0]).split\ ('/')[3]).split('-')[0])+'-'+'Run'+str(listRun['run'])\ +":"+":".join((filesetToProcess.name).split(':')[1:] ) ) if closeFileset.exists() != False: closeFileset = Fileset(id=closeFileset.exists()) closeFileset.loadData() if closeFileset.open == True: closeFileset.markOpen(False) # Commit the fileset filesetToProcess.commit() # Commit the fileset logging.debug("Test purge in T0ASTRun ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...") filesetToProcess.commit()
def verifyFileMetaData(self, jobID, fwkJobReportFiles): """ _verifyFileMetaData_ Verify that all the files that were output by a job made it into WMBS correctly. Compare the contents of WMBS to the files in the frameworks job report. Note that fwkJobReportFiles is a list of DataStructs File objects. """ testJob = Job(id = jobID) testJob.loadData() inputLFNs = [] for inputFile in testJob["input_files"]: inputLFNs.append(inputFile["lfn"]) for fwkJobReportFile in fwkJobReportFiles: outputFile = File(lfn = fwkJobReportFile["lfn"]) outputFile.loadData(parentage = 1) assert outputFile["events"] == int(fwkJobReportFile["events"]), \ "Error: Output file has wrong events: %s, %s" % \ (outputFile["events"], fwkJobReportFile["events"]) assert outputFile["size"] == int(fwkJobReportFile["size"]), \ "Error: Output file has wrong size: %s, %s" % \ (outputFile["size"], fwkJobReportFile["size"]) for ckType in fwkJobReportFile["checksums"]: assert ckType in outputFile["checksums"], \ "Error: Output file is missing checksums: %s" % ckType assert outputFile["checksums"][ckType] == fwkJobReportFile["checksums"][ckType], \ "Error: Checksums don't match." assert len(fwkJobReportFile["checksums"]) == \ len(outputFile["checksums"]), \ "Error: Wrong number of checksums." jobType = self.getJobTypeAction.execute(jobID = jobID) if jobType == "Merge": assert str(outputFile["merged"]) == "True", \ "Error: Merge jobs should output merged files." else: assert outputFile["merged"] == fwkJobReportFile["merged"], \ "Error: Output file merged output is wrong: %s, %s" % \ (outputFile["merged"], fwkJobReportFile["merged"]) assert len(outputFile["locations"]) == 1, \ "Error: outputfile should have one location: %s" % outputFile["locations"] assert list(outputFile["locations"])[0] == list(fwkJobReportFile["locations"])[0], \ "Error: wrong location for file." assert len(outputFile["parents"]) == len(inputLFNs), \ "Error: Output file has wrong number of parents." for outputParent in outputFile["parents"]: assert outputParent["lfn"] in inputLFNs, \ "Error: Unknown parent file: %s" % outputParent["lfn"] fwjrRuns = {} for run in fwkJobReportFile["runs"]: fwjrRuns[run.run] = run.lumis for run in outputFile["runs"]: assert run.run in fwjrRuns, \ "Error: Extra run in output: %s" % run.run for lumi in run: assert lumi in fwjrRuns[run.run], \ "Error: Extra lumi: %s" % lumi fwjrRuns[run.run].remove(lumi) if len(fwjrRuns[run.run]) == 0: del fwjrRuns[run.run] assert len(fwjrRuns) == 0, \ "Error: Missing runs, lumis: %s" % fwjrRuns testJobGroup = JobGroup(id = testJob["jobgroup"]) testJobGroup.loadData() jobGroupFileset = testJobGroup.output jobGroupFileset.loadData() assert outputFile["id"] in jobGroupFileset.getFiles(type = "id"), \ "Error: output file not in jobgroup fileset." if testJob["mask"]["FirstEvent"] == None: assert outputFile["first_event"] == 0, \ "Error: first event not set correctly: 0, %s" % \ outputFile["first_event"] else: assert testJob["mask"]["FirstEvent"] == outputFile["first_event"], \ "Error: last event not set correctly: %s, %s" % \ (testJob["mask"]["FirstEvent"], outputFile["first_event"]) return
if newfile.exists() == False : newfile.create() filesetToProcess.addFile(newfile) filesetToProcess.setLastUpdate(\ int(time.time())) filesetToProcess.commit() runSet = set() runSet.add(Run( lumi\ ['RunNumber' ], *[lumi['LumiSectionNumber']] )) newfile.addRunSet(runSet) else: newfile.loadData() listFile = fileInFileset.execute\ (filesetToProcess.id) if {'fileid': newfile[\ 'id']} not in listFile: filesetToProcess.addFile(newfile) filesetToProcess.setLastUpdate\ (int(time.time())) filesetToProcess.commit() val = 0 for run in newfile['runs']: if lumi['RunNumber' ] == run.run:
def verifyFileMetaData(self, jobID, fwkJobReportFiles): """ _verifyFileMetaData_ Verify that all the files that were output by a job made it into WMBS correctly. Compare the contents of WMBS to the files in the frameworks job report. Note that fwkJobReportFiles is a list of DataStructs File objects. """ testJob = Job(id = jobID) testJob.loadData() inputLFNs = [] for inputFile in testJob["input_files"]: inputLFNs.append(inputFile["lfn"]) for fwkJobReportFile in fwkJobReportFiles: outputFile = File(lfn = fwkJobReportFile["lfn"]) outputFile.loadData(parentage = 1) assert outputFile["events"] == int(fwkJobReportFile["events"]), \ "Error: Output file has wrong events: %s, %s" % \ (outputFile["events"], fwkJobReportFile["events"]) assert outputFile["size"] == int(fwkJobReportFile["size"]), \ "Error: Output file has wrong size: %s, %s" % \ (outputFile["size"], fwkJobReportFile["size"]) for ckType in fwkJobReportFile["checksums"].keys(): assert ckType in outputFile["checksums"].keys(), \ "Error: Output file is missing checksums: %s" % ckType assert outputFile["checksums"][ckType] == fwkJobReportFile["checksums"][ckType], \ "Error: Checksums don't match." assert len(fwkJobReportFile["checksums"].keys()) == \ len(outputFile["checksums"].keys()), \ "Error: Wrong number of checksums." jobType = self.getJobTypeAction.execute(jobID = jobID) if jobType == "Merge": assert str(outputFile["merged"]) == "True", \ "Error: Merge jobs should output merged files." else: assert outputFile["merged"] == fwkJobReportFile["merged"], \ "Error: Output file merged output is wrong: %s, %s" % \ (outputFile["merged"], fwkJobReportFile["merged"]) assert len(outputFile["locations"]) == 1, \ "Error: outputfile should have one location: %s" % outputFile["locations"] assert list(outputFile["locations"])[0] == list(fwkJobReportFile["locations"])[0], \ "Error: wrong location for file." assert len(outputFile["parents"]) == len(inputLFNs), \ "Error: Output file has wrong number of parents." for outputParent in outputFile["parents"]: assert outputParent["lfn"] in inputLFNs, \ "Error: Unknown parent file: %s" % outputParent["lfn"] fwjrRuns = {} for run in fwkJobReportFile["runs"]: fwjrRuns[run.run] = run.lumis for run in outputFile["runs"]: assert run.run in fwjrRuns, \ "Error: Extra run in output: %s" % run.run for lumi in run: assert lumi in fwjrRuns[run.run], \ "Error: Extra lumi: %s" % lumi fwjrRuns[run.run].remove(lumi) if len(fwjrRuns[run.run]) == 0: del fwjrRuns[run.run] assert len(fwjrRuns.keys()) == 0, \ "Error: Missing runs, lumis: %s" % fwjrRuns testJobGroup = JobGroup(id = testJob["jobgroup"]) testJobGroup.loadData() jobGroupFileset = testJobGroup.output jobGroupFileset.loadData() assert outputFile["id"] in jobGroupFileset.getFiles(type = "id"), \ "Error: output file not in jobgroup fileset." if testJob["mask"]["FirstEvent"] == None: assert outputFile["first_event"] == 0, \ "Error: first event not set correctly: 0, %s" % \ outputFile["first_event"] else: assert testJob["mask"]["FirstEvent"] == outputFile["first_event"], \ "Error: last event not set correctly: %s, %s" % \ (testJob["mask"]["FirstEvent"], outputFile["first_event"]) return
def __call__(self, filesetToProcess): """ The algorithm itself """ # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) lastFileset = daofactory(classname = "Fileset.ListFilesetByTask") lastWorkflow = daofactory(classname = "Workflow.LoadFromTask") subsRun = daofactory(\ classname = "Subscriptions.LoadFromFilesetWorkflow") successJob = daofactory(classname = "Subscriptions.SucceededJobs") allJob = daofactory(classname = "Subscriptions.Jobs") fileInFileset = daofactory(classname = "Files.InFileset") # Get the start Run if asked startRun = (filesetToProcess.name).split(":")[3] logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) fileType = (filesetToProcess.name).split(":")[2] crabTask = filesetToProcess.name.split(":")[0] LASTIME = filesetToProcess.lastUpdate tries = 1 while True: try: myRequester = JSONRequests(url = "vocms52.cern.ch:8889") requestResult = myRequester.get("/tier0/runs") except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0ASTRunChain feeder queries done ...") now = time.time() break for listRun in requestResult[0]: if startRun != 'None' and int(listRun['run']) >= int(startRun): if listRun['status'] =='CloseOutExport' or listRun\ ['status']=='Complete' or listRun['status']=='CloseOutT1Skimming': crabWorkflow = lastWorkflow.execute(task=crabTask) crabFileset = lastFileset.execute\ (task=crabTask) crabrunFileset = Fileset(\ name = crabFileset[0]["name"].split(':')[0].split\ ('-Run')[0]+ '-Run' + str(listRun['run']) + ":" + \ ":".join(crabFileset[0]['name'].split(':')[1:]) ) if crabrunFileset.exists() > 0: crabrunFileset.load() currSubs = subsRun.execute\ (crabrunFileset.id, crabWorkflow[0]['id']) if currSubs: listsuccessJob = successJob.execute(\ subscription=currSubs['id']) listallJob = allJob.execute(\ subscription=currSubs['id']) if len(listsuccessJob) == len(listallJob): for currid in listsuccessJob: currjob = Job( id = currid ) currjob.load() logging.debug("Reading FJR %s" %currjob['fwjr_path']) jobReport = readJobReport(currjob['fwjr_path']) if len(jobReport) > 0: if jobReport[0].files: for newFile in jobReport[0].files: logging.debug(\ "Output path %s" %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': \ newFileToAdd['id']} not in listFile: filesetToProcess.addFile(\ newFileToAdd) filesetToProcess\ .setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded and added by T0ASTRunChain...") elif jobReport[0].analysisFiles: for newFile in jobReport\ [0].analysisFiles: logging.debug(\ "Ouput path %s " %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': newFileToAdd\ ['id']} not in listFile: logging.debug\ ("%s loaded and added by T0ASTRunChain" %newFile['LFN']) filesetToProcess.addFile\ (newFileToAdd) filesetToProcess.\ setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded added by T0ASTRunChain...") else: break #Missed fjr - Try next time # Commit the fileset logging.debug("Test purge in T0ASTRunChain ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate # For re-opned fileset or empty, try until the purge time if (int(now)/3600 - LASTIME/3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() if (int(now)/3600 - LASTIME/3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...")
def __call__(self, filesetToProcess): """ The algorithm itself """ # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) lastFileset = daofactory(classname="Fileset.ListFilesetByTask") lastWorkflow = daofactory(classname="Workflow.LoadFromTask") subsRun = daofactory(\ classname = "Subscriptions.LoadFromFilesetWorkflow") successJob = daofactory(classname="Subscriptions.SucceededJobs") allJob = daofactory(classname="Subscriptions.Jobs") fileInFileset = daofactory(classname="Files.InFileset") # Get the start Run if asked startRun = (filesetToProcess.name).split(":")[3] logging.debug("the T0Feeder is processing %s" % \ filesetToProcess.name) logging.debug("the fileset name %s" % \ (filesetToProcess.name).split(":")[0]) fileType = (filesetToProcess.name).split(":")[2] crabTask = filesetToProcess.name.split(":")[0] LASTIME = filesetToProcess.lastUpdate tries = 1 while True: try: myRequester = JSONRequests(url="vocms52.cern.ch:8889") requestResult = myRequester.get("/tier0/runs") except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0ASTRunChain feeder queries done ...") now = time.time() break for listRun in requestResult[0]: if startRun != 'None' and int(listRun['run']) >= int(startRun): if listRun['status'] =='CloseOutExport' or listRun\ ['status']=='Complete' or listRun['status']=='CloseOutT1Skimming': crabWorkflow = lastWorkflow.execute(task=crabTask) crabFileset = lastFileset.execute\ (task=crabTask) crabrunFileset = Fileset(\ name = crabFileset[0]["name"].split(':')[0].split\ ('-Run')[0]+ '-Run' + str(listRun['run']) + ":" + \ ":".join(crabFileset[0]['name'].split(':')[1:]) ) if crabrunFileset.exists() > 0: crabrunFileset.load() currSubs = subsRun.execute\ (crabrunFileset.id, crabWorkflow[0]['id']) if currSubs: listsuccessJob = successJob.execute(\ subscription=currSubs['id']) listallJob = allJob.execute(\ subscription=currSubs['id']) if len(listsuccessJob) == len(listallJob): for currid in listsuccessJob: currjob = Job(id=currid) currjob.load() logging.debug("Reading FJR %s" % currjob['fwjr_path']) jobReport = readJobReport( currjob['fwjr_path']) if len(jobReport) > 0: if jobReport[0].files: for newFile in jobReport[0].files: logging.debug(\ "Output path %s" %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': \ newFileToAdd['id']} not in listFile: filesetToProcess.addFile(\ newFileToAdd) filesetToProcess\ .setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded and added by T0ASTRunChain...") elif jobReport[0].analysisFiles: for newFile in jobReport\ [0].analysisFiles: logging.debug(\ "Ouput path %s " %newFile['LFN']) newFileToAdd = File(\ lfn=newFile['LFN'], locations ='caf.cern.ch') LOCK.acquire() if newFileToAdd.exists\ () == False : newFileToAdd.create() else: newFileToAdd.loadData() LOCK.release() listFile = \ fileInFileset.execute(filesetToProcess.id) if {'fileid': newFileToAdd\ ['id']} not in listFile: logging.debug\ ("%s loaded and added by T0ASTRunChain" %newFile['LFN']) filesetToProcess.addFile\ (newFileToAdd) filesetToProcess.\ setLastUpdate(now) filesetToProcess.commit() logging.debug(\ "new file created/loaded added by T0ASTRunChain...") else: break #Missed fjr - Try next time # Commit the fileset logging.debug("Test purge in T0ASTRunChain ...") filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate # For re-opned fileset or empty, try until the purge time if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime: filesetToProcess.setLastUpdate(time.time()) filesetToProcess.commit() if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...")
def testAddChecksumsByLFN(self): """ _testAddChecksumsByLFN_ Tests for adding checksums by DAO by LFN """ testWorkflow = Workflow(spec='hello', owner="mnorman", name="wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[45])) testFileB.create() testJobA = Job() testJobA.create(group=testJobGroup) testJobA.associateFiles() parentAction = self.daofactory(classname="Files.AddChecksumByLFN") binds = [{ 'lfn': testFileA['lfn'], 'cktype': 'cksum', 'cksum': 101 }, { 'lfn': testFileA['lfn'], 'cktype': 'adler32', 'cksum': 201 }, { 'lfn': testFileB['lfn'], 'cktype': 'cksum', 'cksum': 101 }] parentAction.execute(bulkList=binds) testFileC = File(id=testFileA["id"]) testFileC.loadData() testFileD = File(id=testFileB["id"]) testFileD.loadData() self.assertEqual(testFileC['checksums'], { 'adler32': '201', 'cksum': '101' }) self.assertEqual(testFileD['checksums'], {'cksum': '101'}) return
def __call__(self, filesetToProcess): """ The algorithm itself """ global LOCK # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), os.getenv("DIALECT"), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationNew = daofactory(classname="Locations.New") getFileLoc = daofactory(classname="Files.GetLocation") fileInFileset = daofactory(classname="Files.InFileset") logging.debug("the T0Feeder is processing %s" % filesetToProcess.name) logging.debug("the fileset name %s" % (filesetToProcess.name).split(":")[0]) # Get the start Run if asked startRun = (filesetToProcess.name).split(":")[3] fileType = (filesetToProcess.name).split(":")[2] LASTIME = filesetToProcess.lastUpdate # url builder primaryDataset = ((filesetToProcess.name).split(":")[0]).split("/")[1] processedDataset = ((filesetToProcess.name).split(":")[0]).split("/")[2] dataTier = ((filesetToProcess.name).split(":")[0]).split("/")[3] url = "/tier0/listfilesoverinterval/%s/%s/%s/%s/%s" % ( fileType, LASTIME, primaryDataset, processedDataset, dataTier, ) tries = 1 while True: try: myRequester = JSONRequests(url="vocms52.cern.ch:8889") requestResult = myRequester.get(url + "/" + "?return_type=text/json%2Bdas") newFilesList = requestResult[0]["results"] except: logging.debug("T0Reader call error...") if tries == self.maxRetries: return else: tries += 1 continue logging.debug("T0 queries done ...") now = time.time() LASTIME = int(newFilesList["end_time"]) + 1 break # process all files if len(newFilesList["files"]): try: locationNew.execute(siteName="caf.cern.ch", seName="caf.cern.ch") except Exception, e: logging.debug("Error when adding new location...") logging.debug(e) logging.debug(format_exc()) for files in newFilesList["files"]: # Assume parents aren't asked newfile = File(str(files["lfn"]), size=files["file_size"], events=files["events"]) try: LOCK.acquire() if newfile.exists() == False: newfile.create() for run in files["runs"]: runSet = set() runSet.add(Run(run, *files["runs"][run])) newfile.addRunSet(runSet) else: newfile.loadData() fileLoc = getFileLoc.execute(file=files["lfn"]) if "caf.cern.ch" not in fileLoc: newfile.setLocation("caf.cern.ch") # else: # logging.debug("File already associated to %s" %fileLoc) LOCK.release() if len(newfile["runs"]): val = 0 for run in newfile["runs"]: if run.run < int(startRun): val = 1 break if not val: listFile = fileInFileset.execute(filesetToProcess.id) if {"fileid": newfile["id"]} not in listFile: filesetToProcess.addFile(newfile) filesetToProcess.setLastUpdate(int(newFilesList["end_time"]) + 1) filesetToProcess.commit() logging.debug("new file created/loaded added by T0AST...") except Exception, e: logging.debug("Error when adding new files in T0AST...") logging.debug(e) logging.debug(format_exc()) LOCK.release()
def __call__(self, filesetToProcess): """ The algorithm itself """ # Get configuration initObj = WMInit() initObj.setLogging() initObj.setDatabaseConnection(os.getenv("DATABASE"), \ os.getenv('DIALECT'), os.getenv("DBSOCK")) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS" , \ logger = myThread.logger, \ dbinterface = myThread.dbi) locationNew = daofactory(classname = "Locations.New") getFileLoc = daofactory(classname = "Files.GetLocation") fileInFileset = daofactory(classname = "Files.InFileset") logging.debug("DBSFeeder is processing %s" % \ filesetToProcess.name) logging.debug("the filesetBase name %s" \ % (filesetToProcess.name).split(":")[0]) LASTIME = filesetToProcess.lastUpdate # Get the start Run if asked startRun = (filesetToProcess.name).split(":")[3] # get list of files tries = 1 while True: try: blocks = self.dbsReader.getFiles(\ (filesetToProcess.name).split(":")[0]) now = time.time() logging.debug("DBS queries done ...") break except DBSReaderError as ex: logging.error("DBS error: %s, cannot get files for %s" % \ (str(ex), filesetToProcess.name)) # Close fileset filesetToProcess.markOpen(False) return # connection error, retry except DbsConnectionError as ex: logging.error("Unable to connect to DBS, retrying: " + \ str(ex)) if tries > self.connectionAttempts: #too many errors - bail out return tries = tries + 1 # check for empty datasets if blocks == {}: logging.debug("DBS: Empty blocks - %s" %filesetToProcess.name) return filesetToProcess # get all file blocks blockList = blocks.keys() # process all file blocks for fileBlock in blockList: seList = blocks[fileBlock]['StorageElements'] # add files for non blocked SE if seList is None or seList == []: logging.info("fileblock %s - no SE's associated" % \ fileBlock) continue else: for loc in seList: locationNew.execute(siteName = loc, seName = loc) for files in blocks[fileBlock]['Files']: if startRun != 'None': if len(files['LumiList']): for lumi in files['LumiList']: if int(startRun) <= int(lumi['RunNumber' ]): newfile = File(files['LogicalFileName'], \ size=files['FileSize'], events=files\ ['NumberOfEvents']) LOCK.acquire() if newfile.exists() == False : newfile.create() filesetToProcess.addFile(newfile) filesetToProcess.setLastUpdate(\ int(time.time())) filesetToProcess.commit() runSet = set() runSet.add(Run( lumi\ ['RunNumber' ], *[lumi['LumiSectionNumber']] )) newfile.addRunSet(runSet) else: newfile.loadData() listFile = fileInFileset.execute\ (filesetToProcess.id) if {'fileid': newfile[\ 'id']} not in listFile: filesetToProcess.addFile(newfile) filesetToProcess.setLastUpdate\ (int(time.time())) filesetToProcess.commit() val = 0 for run in newfile['runs']: if lumi['RunNumber' ] == run.run: val = 1 break if not val: runSet = set() runSet.add(Run(\ lumi['RunNumber' ], *[lumi['LumiSectionNumber']])) newfile.addRunSet(runSet) fileLoc = getFileLoc.execute(\ file = files['LogicalFileName']) if fileLoc: for loc in seList: if loc not in fileLoc: newfile.setLocation(\ loc) else: newfile.setLocation(seList) LOCK.release() else: # Assume parents and LumiSection aren't asked newfile = File(files['LogicalFileName'], \ size=files['FileSize'], events=files['NumberOfEvents']) LOCK.acquire() if newfile.exists() == False : newfile.create() # Update fileset last update parameter filesetToProcess.addFile(newfile) logging.debug("new file created and added by DBS") filesetToProcess.setLastUpdate(int(time.time())) filesetToProcess.commit() else: newfile.loadData() listFile = fileInFileset.execute(filesetToProcess.id) if {'fileid': newfile['id']} not in listFile: filesetToProcess.addFile(newfile) logging.debug("new file loaded and added by DBS") filesetToProcess.setLastUpdate(int(time.time())) filesetToProcess.commit() fileLoc = getFileLoc.execute(\ file = files['LogicalFileName']) if fileLoc: for loc in seList: if loc not in fileLoc: newfile.setLocation(loc) else: newfile.setLocation(seList) LOCK.release() filesetToProcess.load() LASTIME = filesetToProcess.lastUpdate # For re-opned fileset or empty, try until the purge time if (int(now)/3600 - LASTIME/3600) > self.reopenTime: filesetToProcess.setLastUpdate(int(time.time())) filesetToProcess.commit() if (int(now)/3600 - LASTIME/3600) > self.purgeTime: filesetToProcess.markOpen(False) logging.debug("Purge Done...") filesetToProcess.commit() logging.debug("DBS feeder work done...")