Example #1
0
    def testSetLocationOrder(self):
        """
        _testSetLocationOrder_

        This tests that you can specify a location before creating the file,
        instead of having to do it afterwards.
        """
        myThread = threading.currentThread()

        testFileA = File(lfn="/this/is/a/lfn", size=1024, events=10)
        testFileA.setLocation("se1.cern.ch")
        testFileA.create()

        testFileB = File(lfn=testFileA["lfn"])
        testFileB.load()

        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        locationFac = daoFactory(classname="Files.GetLocation")
        location = locationFac.execute(testFileB['lfn']).pop()

        self.assertEqual(location, 'se1.cern.ch')

        return
Example #2
0
    def testSetLocation(self):
        """
        _testSetLocation_

        Create a file and add a couple locations.  Load the file from the
        database to make sure that the locations were set correctly.
        """
        testFileA = File(lfn="/this/is/a/lfn",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1})
        testFileA.addRun(Run(1, *[45]))
        testFileA.create()

        testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"])
        testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"],
                              immediateSave=False)

        testFileB = File(id=testFileA["id"])
        testFileB.loadData()

        goldenLocations = ["se1.fnal.gov", "se1.cern.ch"]

        for location in testFileB["locations"]:
            assert location in goldenLocations, \
                   "ERROR: Unknown file location"
            goldenLocations.remove(location)

        assert len(goldenLocations) == 0, \
              "ERROR: Some locations are missing"
        return
Example #3
0
    def test05(self):
        """
        _test05_

        Test multi lumis express merges with holes

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        time.sleep(1)

        mySplitArgs['maxLatency'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        return
Example #4
0
    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if type(file["locations"]) == set:
            seName = list(file["locations"])[0]
        elif type(file["locations"]) == list:
            if len(file['locations']) > 1:
                logging.error("Have more then one location for a file in job %i" % (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            seName = file["locations"][0]
        else:
            seName = file["locations"]

        wmbsFile["locations"] = set()

        if seName != None:
            wmbsFile.setLocation(se = seName, immediateSave = False)
        wmbsFile['jid'] = jobID
        self.wmbsFilesToBuild.append(wmbsFile)

        return wmbsFile
Example #5
0
    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")


        return
Example #6
0
    def createJob(self, streamerList, jobEvents, jobSize):
        """
        _createJob_

        create an express job processing
        the passed in list of streamers

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 0.5MB/s repack speed
        #   - 45s/evt reco speed
        #   - checksum calculation at 5MB/s (twice)
        #   - stageout at 5MB/s
        # job disk based on
        #   - streamer on local disk (factor 1)
        #   - RAW on local disk (factor 1)
        #   - FEVT/ALCARECO/DQM on local disk (factor 4)
        jobTime = 300 + jobSize/500000 + jobEvents*45 + (jobSize*4*3)/5000000
        self.currentJob.addResourceEstimates(jobTime = jobTime, disk = (jobSize*6)/1024)

        return
Example #7
0
    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if isinstance(file["locations"], set):
            pnn = list(file["locations"])[0]
        elif isinstance(file["locations"], list):
            if len(file['locations']) > 1:
                logging.error(
                    "Have more then one location for a file in job %i" %
                    (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            pnn = file["locations"][0]
        else:
            pnn = file["locations"]

        wmbsFile["locations"] = set()

        if pnn != None:
            wmbsFile.setLocation(pnn=pnn, immediateSave=False)
        wmbsFile['jid'] = jobID

        return wmbsFile
Example #8
0
    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if type(file["locations"]) == set:
            seName = list(file["locations"])[0]
        elif type(file["locations"]) == list:
            if len(file['locations']) > 1:
                logging.error(
                    "Have more then one location for a file in job %i" %
                    (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            seName = file["locations"][0]
        else:
            seName = file["locations"]

        wmbsFile["locations"] = set()

        if seName != None:
            wmbsFile.setLocation(se=seName, immediateSave=False)
        wmbsFile['jid'] = jobID

        return wmbsFile
Example #9
0
    def test03(self):
        """
        _test03_

        Test input size threshold on multi lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription2)

        mySplitArgs["maxInputSize"] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0, "ERROR: JobFactory should have returned no JobGroup")

        time.sleep(1)

        mySplitArgs["maxLatency"] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1, "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2, "ERROR: JobFactory didn't create two jobs")

        return
Example #10
0
    def createJob(self, fileList, jobSize):
        """
        _createJob_

        create an express merge job for
        the passed in list of files

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID()))

        largestFile = 0
        for fileInfo in fileList:
            largestFile = max(largestFile, fileInfo['filesize'])
            f = File(id=fileInfo['id'], lfn=fileInfo['lfn'])
            f.setLocation(fileInfo['location'], immediateSave=False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 5MB/s merge speed
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #  - input for largest file on local disk
        #  - output on local disk (factor 1)
        jobTime = 300 + (jobSize * 3) / 5000000
        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                             disk=(jobSize + largestFile) /
                                             1024)

        return
Example #11
0
    def testGetInfo(self):
        """
        _testGetInfo_

        Test the getInfo() method of the File class to make sure that it
        returns the correct information.
        """
        testFileParent = File(lfn = "/this/is/a/parent/lfn", size = 1024,
                              events = 20, checksums={'cksum': 1111})
        testFileParent.addRun(Run(1, *[45]))
        testFileParent.create()

        testFile = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums={'cksum': 222})
        testFile.addRun(Run(1, *[45]))
        testFile.addRun(Run(2, *[46, 47]))
        testFile.addRun(Run(2, *[47, 48]))
        testFile.create()
        testFile.setLocation(se = "se1.fnal.gov", immediateSave = False)
        testFile.setLocation(se = "se1.cern.ch", immediateSave = False)
        testFile.addParent("/this/is/a/parent/lfn")

        info = testFile.getInfo()

        assert info[0] == testFile["lfn"], \
               "ERROR: File returned wrong LFN"
        
        assert info[1] == testFile["id"], \
               "ERROR: File returned wrong ID"
        
        assert info[2] == testFile["size"], \
               "ERROR: File returned wrong size"
        
        assert info[3] == testFile["events"], \
               "ERROR: File returned wrong events"
        
        assert info[4] == testFile["checksums"], \
               "ERROR: File returned wrong cksum"
        
        assert len(info[5]) == 2, \
		      "ERROR: File returned wrong runs"
        
        assert info[5] == [Run(1, *[45]), Run(2, *[46, 47, 48])], \
               "Error: Run hasn't been combined correctly"
               
        assert len(info[6]) == 2, \
               "ERROR: File returned wrong locations"

        for testLocation in info[6]:
            assert testLocation in ["se1.fnal.gov", "se1.cern.ch"], \
                   "ERROR: File returned wrong locations"

        assert len(info[7]) == 1, \
               "ERROR: File returned wrong parents"

        assert info[7][0] == testFileParent, \
               "ERROR: File returned wrong parents"

        testFile.delete()
        testFileParent.delete()
        return
Example #12
0
    def testSetLocation(self):
        """
        _testSetLocation_

        Create a file and add a couple locations.  Load the file from the
        database to make sure that the locations were set correctly.
        """
        testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10,
                        checksums = {'cksum':1})
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()

        testFileA.setLocation(["se1.fnal.gov", "se1.cern.ch"])
        testFileA.setLocation(["bunkse1.fnal.gov", "bunkse1.cern.ch"],
                              immediateSave = False)

        testFileB = File(id = testFileA["id"])
        testFileB.loadData()

        goldenLocations = ["se1.fnal.gov", "se1.cern.ch"]

        for location in testFileB["locations"]:
            assert location in goldenLocations, \
                   "ERROR: Unknown file location"
            goldenLocations.remove(location)

        assert len(goldenLocations) == 0, \
              "ERROR: Some locations are missing"    
        return
Example #13
0
    def test05(self):
        """
        _test05_
        Test multi lumis express merges with holes
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription2)

        time.sleep(1)

        mySplitArgs['maxLatency'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        return
Example #14
0
    def createJob(self, fileList, jobSize):
        """
        _createJob_

        create an express merge job for
        the passed in list of files

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        largestFile = 0
        for fileInfo in fileList:
            largestFile = max(largestFile, fileInfo['filesize'])
            f = File(id = fileInfo['id'],
                     lfn = fileInfo['lfn'])
            f.setLocation(fileInfo['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 5MB/s merge speed
        #   - checksum calculation at 5MB/s (twice)
        #   - stageout at 5MB/s
        # job disk based on
        #  - input for largest file on local disk
        #  - output on local disk (factor 1)
        jobTime = 300 + (jobSize*4)/5000000
        self.currentJob.addResourceEstimates(jobTime = jobTime, disk = (jobSize+largestFile)/1024)

        return
Example #15
0
    def createJob(self, streamerList, jobEvents, jobSize, timePerEvent, sizePerEvent, memoryRequirement):
        """
        _createJob_

        create an express job processing
        the passed in list of streamers

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization (twice)
        #   - 0.5MB/s repack speed
        #   - reco with timePerEvent
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #   - streamer or RAW on local disk (factor 1)
        #   - FEVT/ALCARECO/DQM on local disk (sizePerEvent)
        jobTime = 600 + jobSize/500000 + jobEvents*timePerEvent + (jobEvents*sizePerEvent*2)/5000000
        self.currentJob.addResourceEstimates(jobTime = min(jobTime, 47*3600),
                                             disk = min(jobSize/1024 + jobEvents*sizePerEvent, 20000000),
                                             memory = memoryRequirement)

        return
Example #16
0
    def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement, numberOfCores = 1):
        """
        _createJob_

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        if numberOfCores > 1:
            self.currentJob.addBaggageParameter("numberOfCores", numberOfCores)

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 0.5MB/s repack speed
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #   - RAW on local disk (factor 1)
        jobTime = 300 + jobSize/500000 + (jobSize*2)/5000000
        self.currentJob.addResourceEstimates(jobTime = jobTime, disk = jobSize/1024, memory = memoryRequirement)

        return
Example #17
0
    def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement):
        """
        _createJob_

        create an express job processing
        the passed in list of streamers

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id=streamer['id'], lfn=streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave=False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 0.5MB/s repack speed
        #   - 45s/evt reco speed
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #   - streamer or RAW on local disk (factor 1)
        #   - FEVT/ALCARECO/DQM on local disk (factor 4)
        jobTime = 300 + jobSize / 500000 + jobEvents * 45 + (jobSize * 4 *
                                                             2) / 5000000
        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                             disk=(jobSize * 5) / 1024,
                                             memory=memoryRequirement)

        return
    def createFileFromDataStructsFile(self, file, jobID):
        """
        _createFileFromDataStructsFile_

        This function will create a WMBS File given a DataStructs file
        """
        wmbsFile = File()
        wmbsFile.update(file)

        if isinstance(file["locations"], set):
            pnn = list(file["locations"])[0]
        elif isinstance(file["locations"], list):
            if len(file['locations']) > 1:
                logging.error("Have more then one location for a file in job %i" % (jobID))
                logging.error("Choosing location %s" % (file['locations'][0]))
            pnn = file["locations"][0]
        else:
            pnn = file["locations"]

        wmbsFile["locations"] = set()

        if pnn != None:
            wmbsFile.setLocation(pnn = pnn, immediateSave = False)
        wmbsFile['jid'] = jobID
        
        return wmbsFile
Example #19
0
    def test03(self):
        """
        _test03_

        Test single lumi event threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxLatency'] = 50000
	jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 650
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return
Example #20
0
    def test06(self):
        """
        _test06_

        Test repacking of 3 lumis
        2 small lumis (single job), followed by a big one (multiple jobs)

        files for lumi 1 and 2 are below multi-lumi thresholds
        files for lumi 3 are above single-lumi threshold

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3]:
            filecount = 2
            for i in range(filecount):
                if lumi == 3:
                    nevents = 500
                else:
                    nevents = 100
                newFile = File(makeUUID(), size = 1000, events = nevents)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxLatency'] = 50000
	mySplitArgs['maxInputEvents'] = 900
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

	self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1,
                         "ERROR: second job does not process 1 file")

        self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1,
                         "ERROR: third job does not process 1 file")

        return
Example #21
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T1_US_FNAL_Disk",
                                     "T1_US_FNAL")
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T3_US_FNALLPC",
                                     "T1_US_FNAL")
        myResourceControl.insertSite("T2_CH_CERN", 10, 20, "T2_CH_CERN",
                                     "T2_CH_CERN")

        self.fileset1 = Fileset(name="TestFileset1")
        for fileNum in range(11):
            newFile = File("/some/file/name%d" % fileNum,
                           size=1000,
                           events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Harvest",
                                          type="Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return
Example #22
0
    def loadFiles(self, size=10):
        """
        _loadFiles_

        Grab some files from the resultProxy
        Should handle multiple proxies.  Not really sure about that
        """

        if len(self.proxies) < 1:
            # Well, you don't have any proxies.
            # This is what happens when you ran out of files last time
            logging.info("No additional files found; Ending.")
            return set()

        resultProxy = self.proxies[0]
        rawResults = []
        if type(resultProxy.keys) == list:
            keys = resultProxy.keys
        else:
            keys = resultProxy.keys()
            if type(keys) == set:
                # If it's a set, handle it
                keys = list(keys)
        files = set()

        while len(rawResults) < size and len(self.proxies) > 0:
            length = size - len(rawResults)
            newResults = resultProxy.fetchmany(size=length)
            if len(newResults) < length:
                # Assume we're all out
                # Eliminate this proxy
                self.proxies.remove(resultProxy)
            rawResults.extend(newResults)

        if rawResults == []:
            # Nothing to do
            return set()

        fileList = self.formatDict(results=rawResults, keys=keys)
        fileIDs = list(set([x["fileid"] for x in fileList]))

        myThread = threading.currentThread()
        fileInfoAct = self.daoFactory(classname="Files.GetForJobSplittingByID")
        fileInfoDict = fileInfoAct.execute(file=fileIDs, conn=myThread.transaction.conn, transaction=True)

        getLocAction = self.daoFactory(classname="Files.GetLocationBulk")
        getLocDict = getLocAction.execute(files=fileIDs, conn=myThread.transaction.conn, transaction=True)

        for fID in fileIDs:
            fl = WMBSFile(id=fID)
            fl.update(fileInfoDict[fID])
            locations = getLocDict.get((fID), [])
            for loc in locations:
                fl.setLocation(loc, immediateSave=False)
            files.add(fl)

        return files
Example #23
0
    def test06(self):
        """
        _test06_

        Test max input files threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return
Example #24
0
    def test06(self):
        """
        _test06_

        Test max input files threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return
Example #25
0
    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works

        Test max edm size threshold for single lumi

        small lumi, followed by over-large lumi
        expect 1 job for small lumi and 4 jobs for over-large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2 * lumi):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxEdmSize'] = 13000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("RepackMerge-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 3,
                         "ERROR: Job does not process 3 files")

        job = jobGroups[0].jobs[2]
        self.assertEqual(len(job.getFiles()), 1,
                         "ERROR: Job does not process 1 file")

        return
Example #26
0
    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site = None, bl = [], wl = []):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, 'CacheDir')

        for n in range(nJobs):
            # First make a file
            #site = self.sites[0]
            testFile = File(lfn = "/singleLfn/%s/%s" %(name, n),
                            size = 1024, events = 10)
            if site:
                testFile.setLocation(site)
            else:
                for tmpSite in self.sites:
                    testFile.setLocation('se.%s' % (tmpSite))
            testFile.create()
            fileset.addFile(testFile)


        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name = '%s-%i' %(name, index))
            testJob.addFile(f)
            testJob["location"]  = f.getLocations()[0]
            testJob['custom']['location'] = f.getLocations()[0]
            testJob['task']    = task.getPathName()
            testJob['sandbox'] = task.data.input.sandbox
            testJob['spec']    = os.path.join(self.testDir, 'basicWorkload.pcl')
            testJob['mask']['FirstEvent'] = 101
            testJob['owner']   = 'tapas'
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob['ownerDN'] = 'tapas'
            testJob['ownerRole'] = 'cmsrole'
            testJob['ownerGroup'] = 'phgroup'

            jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob['cache_dir'] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, 'job.pkl'),'w')
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile
Example #27
0
    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works

        Test max edm size threshold for single lumi

        small lumi, followed by over-large lumi
        expect 1 job for small lumi and 4 jobs for over-large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2 * lumi):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxEdmSize'] = 13000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("RepackMerge-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 3,
                         "ERROR: Job does not process 3 files")

        job = jobGroups[0].jobs[2]
        self.assertEqual(len(job.getFiles()), 1,
                         "ERROR: Job does not process 1 file")

        return
Example #28
0
    def test10(self):
        """
        _test10_
        Test merging of multiple lumis with holes in the lumi sequence
        Hole is due to no streamer files for the lumi
        Multi lumi input
        
        It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize.
        
        It was changed due to the maxinputevents not being used anymore
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription2)

        mySplitArgs['minInputSize'] = 100000
        mySplitArgs['maxInputSize'] = 200000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds={
            'RUN': 1,
            'LUMI': 3,
            'STREAM': "A",
            'FILECOUNT': 0,
            'INSERT_TIME': self.currentTime,
            'CLOSE_TIME': self.currentTime
        },
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Example #29
0
    def createJob(self, fileList, jobSize, errorDataset=False):
        """
        _createJob_

        create a repack merge job for
        the passed in list of files

        """
        # find largest file
        largestFile = 0
        for fileInfo in fileList:
            largestFile = max(largestFile, fileInfo['filesize'])

        # calculate number of cores based on disk usage
        numberOfCores = 1 + (int)(
            (jobSize + largestFile) / (20 * 1000 * 1000 * 1000))

        # jobs requesting more than 8 cores would never run
        if numberOfCores > 8:
            self.markFailed(streamerList)
            return

        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID()))

        for fileInfo in fileList:
            f = File(id=fileInfo['id'], lfn=fileInfo['lfn'])
            f.setLocation(fileInfo['location'], immediateSave=False)
            self.currentJob.addFile(f)

        if errorDataset:
            self.currentJob.addBaggageParameter("useErrorDataset", True)

        # allow large (single lumi) repackmerge to use multiple cores
        if numberOfCores > 1:
            self.currentJob.addBaggageParameter("numberOfCores", numberOfCores)

        # job time based on
        #  - 5 min initialization
        #  - 5MB/s merge speed
        #  - checksum calculation at 5MB/s
        #  - stageout at 5MB/s
        # job disk based on
        #  - input for largest file on local disk
        #  - output on local disk (factor 1)
        jobTime = 300 + (jobSize * 3) / 5000000
        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                             disk=(jobSize + largestFile) /
                                             1024,
                                             memory=1000)

        return
Example #30
0
    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site=None, bl=[], wl=[]):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, "CacheDir")

        for n in range(nJobs):
            # First make a file
            # site = self.sites[0]
            testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10)
            if site:
                testFile.setLocation(site)
            else:
                for tmpSite in self.sites:
                    testFile.setLocation("se.%s" % (tmpSite))
            testFile.create()
            fileset.addFile(testFile)

        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name="%s-%i" % (name, index))
            testJob.addFile(f)
            testJob["location"] = f.getLocations()[0]
            testJob["custom"]["location"] = f.getLocations()[0]
            testJob["task"] = task.getPathName()
            testJob["sandbox"] = task.data.input.sandbox
            testJob["spec"] = os.path.join(self.testDir, "basicWorkload.pcl")
            testJob["mask"]["FirstEvent"] = 101
            testJob["owner"] = "tapas"
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob["ownerDN"] = "tapas"
            testJob["ownerRole"] = "cmsrole"
            testJob["ownerGroup"] = "phgroup"

            jobCache = os.path.join(cacheDir, "Sub_%i" % (sub), "Job_%i" % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob["cache_dir"] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, "job.pkl"), "w")
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile
Example #31
0
    def test09(self):
        """
        _test09_

        Test under merge (over merge event threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return
Example #32
0
    def test09(self):
        """
        _test09_

        Test under merge (over merge event threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return
Example #33
0
    def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site, bl = [], wl = []):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, 'CacheDir')

        for n in range(nJobs):
            # First make a file
            #site = self.sites[0]
            testFile = File(lfn = "/singleLfn/%s/%s" % (name, n),
                            size = 1024, events = 10)
            if type(site) == list:
                for singleSite in site:
                    testFile.setLocation(singleSite)
            else:
                testFile.setLocation(site)
            testFile.create()
            fileset.addFile(testFile)


        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name = '%s-%i' % (name, index))
            testJob.addFile(f)
            testJob["location"] = f.getLocations()[0]
            testJob['task'] = task.getPathName()
            testJob['sandbox'] = task.data.input.sandbox
            testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl')
            testJob['mask']['FirstEvent'] = 101
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob['priority'] = 101
            testJob['multicoreEnabled'] = False
            testJob['numberOfCores'] = 1
            jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob['cache_dir'] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, 'job.pkl'), 'w')
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile
Example #34
0
    def test10(self):
        """
        _test10_
        Test merging of multiple lumis with holes in the lumi sequence
        Hole is due to no streamer files for the lumi
        Multi lumi input
        
        It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize.
        
        It was changed due to the maxinputevents not being used anymore
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 100000
        mySplitArgs['maxInputSize'] = 200000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Example #35
0
    def filesOfStatus(self,
                      status,
                      limit=0,
                      loadChecksums=True,
                      doingJobSplitting=False):
        """
        _filesOfStatus_

        Return a Set of File objects that have the given status with respect
        to this subscription.
        """
        existingTransaction = self.beginTransaction()

        status = status.title()
        files = set()
        if limit > 0:
            action = self.daofactory(
                classname="Subscriptions.Get%sFilesByLimit" % status)
            fileList = action.execute(self["id"],
                                      limit,
                                      conn=self.getDBConn(),
                                      transaction=self.existingTransaction())
        else:
            action = self.daofactory(classname="Subscriptions.Get%sFiles" %
                                     status)
            fileList = action.execute(self["id"],
                                      conn=self.getDBConn(),
                                      transaction=self.existingTransaction())

        if doingJobSplitting:
            fileInfoAct = self.daofactory(
                classname="Files.GetForJobSplittingByID")
        else:
            fileInfoAct = self.daofactory(classname="Files.GetByID")

        fileInfoDict = fileInfoAct.execute(
            file=[x["file"] for x in fileList],
            conn=self.getDBConn(),
            transaction=self.existingTransaction())

        #Run through all files
        for f in fileList:
            fl = File(id=f['file'])
            if loadChecksums:
                fl.loadChecksum()
            fl.update(fileInfoDict[f['file']])
            if 'locations' in f.keys():
                fl.setLocation(f['locations'], immediateSave=False)
            files.add(fl)

        self.commitTransaction(existingTransaction)
        return files
Example #36
0
    def createFile(self, lfn, events, run, lumis, location):
        """
        _createFile_

        Create a file for testing
        """
        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((run * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
    def createFile(self, lfn, events, run, lumis, location):
        """
        _createFile_

        Create a file for testing
        """
        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((run * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
Example #38
0
    def createTestJobs(self, nJobs, cacheDir):
        """
        _createTestJobs_

        Create several jobs
        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Simon",
                                name="wf001",
                                task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow,
                                        type="Processing",
                                        split_algo="FileBased")
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        # Create a file
        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')
        testFileA.create()

        baseName = makeUUID()

        # Now create a job
        for i in range(nJobs):
            testJob = Job(name='%s-%i' % (baseName, i))
            testJob.addFile(testFileA)
            testJob['location'] = 'malpaquet'
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob.create(testJobGroup)
            testJob.save()
            testJobGroup.add(testJob)

        testJobGroup.commit()

        # Set test job caches
        for job in testJobGroup.jobs:
            job.setCache(cacheDir)

        return testJobGroup
Example #39
0
def createCommonFileset():
    """
    Create a simple fileset with 2 files at the same location
    """
    multipleFilesFileset = Fileset(name="TestFileset")

    newFile = File("/some/file/test1", size=1000, events=100)
    newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7]))
    newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7]))
    newFile.setLocation('T2_CH_CERN')
    multipleFilesFileset.addFile(newFile)

    newFile = File("/some/file/test2", size=2000, events=200)
    newFile.addRun(Run(3, *[2, 8]))
    newFile.addRun(Run(4, *[3, 8]))
    newFile.setLocation('T2_CH_CERN')
    multipleFilesFileset.addFile(newFile)

    newFile = File("/some/file/test3", size=3000, events=300)
    newFile.addRun(Run(5, *[10, 11, 12]))
    newFile.addRun(Run(6, *[10, 11, 12]))
    newFile.setLocation('T2_CH_CERN')
    multipleFilesFileset.addFile(newFile)

    newFile = File("/some/file/test4", size=4000, events=400)
    newFile.addRun(Run(2, *[3, 8, 9]))
    newFile.addRun(Run(3, *[3, 4, 5, 6]))
    newFile.setLocation('T2_CH_CERN')
    multipleFilesFileset.addFile(newFile)

    multipleFilesFileset.create()
    return multipleFilesFileset
Example #40
0
    def createTestJobs(self, nJobs, cacheDir):
        """
        _createTestJobs_

        Create several jobs
        """


        testWorkflow = Workflow(spec = "spec.xml", owner = "Simon",
                                name = "wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name = "TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow,
                                        type = "Processing",
                                        split_algo = "FileBased")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        # Create a file
        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')
        testFileA.create()

        baseName = makeUUID()

        # Now create a job
        for i in range(nJobs):
            testJob = Job(name = '%s-%i' % (baseName, i))
            testJob.addFile(testFileA)
            testJob['location'] = 'malpaquet'
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob.create(testJobGroup)
            testJob.save()
            testJobGroup.add(testJob)

        testJobGroup.commit()

        # Set test job caches
        for job in testJobGroup.jobs:
            job.setCache(cacheDir)

        return testJobGroup
    def stuffWMBS(self):
        """
        _stuffWMBS_

        Inject the workflow in WMBS and add the subscriptions
        """

        testWorkflow = Workflow(spec = os.path.join(getTestBase(),
                                                    "WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl"),
                                owner = "/CN=OU/DN=SomeoneWithPermissions",
                                name = "BogusRequest", task = "BogusTask", owner_vogroup = "", owner_vorole = "")
        testWorkflow.create()

        testMergeWorkflow = Workflow(spec = os.path.join(getTestBase(),
                                                    "WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl"),
                                     owner = "/CN=OU/DN=SomeoneWithPermissions",
                                     name = "BogusRequest", task = "BogusTask/Merge", owner_vogroup = "", owner_vorole = "")
        testMergeWorkflow.create()

        testWMBSFileset = Fileset(name = "TopFileset")
        testWMBSFileset.create()
        testWMBSFilesetUnmerged = Fileset(name = "UnmergedFileset")
        testWMBSFilesetUnmerged.create()

        testFileA = File(lfn = "/this/is/a/lfnA" , size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run(10, *[12314]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFilesetUnmerged.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFilesetUnmerged.commit()

        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow)
        testSubscription.create()

        testSubscriptionMerge = Subscription(fileset = testWMBSFilesetUnmerged,
                                             workflow = testMergeWorkflow,
                                             type = "Merge")
        testSubscriptionMerge.create()

        return (testSubscription, testSubscriptionMerge)
    def stuffWMBS(self):
        """
        _stuffWMBS_

        Inject the workflow in WMBS and add the subscriptions
        """

        testWorkflow = Workflow(spec = os.path.join(getTestBase(),
                                                    "WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl"),
                                owner = "/CN=OU/DN=SomeoneWithPermissions",
                                name = "BogusRequest", task = "BogusTask", owner_vogroup = "", owner_vorole = "")
        testWorkflow.create()

        testMergeWorkflow = Workflow(spec = os.path.join(getTestBase(),
                                                    "WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl"),
                                     owner = "/CN=OU/DN=SomeoneWithPermissions",
                                     name = "BogusRequest", task = "BogusTask/Merge", owner_vogroup = "", owner_vorole = "")
        testMergeWorkflow.create()

        testWMBSFileset = Fileset(name = "TopFileset")
        testWMBSFileset.create()
        testWMBSFilesetUnmerged = Fileset(name = "UnmergedFileset")
        testWMBSFilesetUnmerged.create()

        testFileA = File(lfn = "/this/is/a/lfnA" , size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10)
        testFileB.addRun(Run(10, *[12314]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFilesetUnmerged.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFilesetUnmerged.commit()

        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow)
        testSubscription.create()

        testSubscriptionMerge = Subscription(fileset = testWMBSFilesetUnmerged,
                                             workflow = testMergeWorkflow,
                                             type = "Merge")
        testSubscriptionMerge.create()

        return (testSubscription, testSubscriptionMerge)
Example #43
0
    def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement):
        """
        _createJob_

        """
        # find largest file
        largestFile = 0
        for streamer in streamerList:
            largestFile = max(largestFile, streamer['filesize'])

        # calculate number of cores based on disk usage
        numberOfCores = 1 + (int)(
            (jobSize + largestFile) / (20 * 1000 * 1000 * 1000))

        # jobs requesting more than 8 cores would never run
        if numberOfCores > 8:
            self.markFailed(streamerList)
            return

        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id=streamer['id'], lfn=streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave=False)
            self.currentJob.addFile(f)

        # allow large (single lumi) repack to use multiple cores
        if numberOfCores > 1:
            self.currentJob.addBaggageParameter("numberOfCores", numberOfCores)

        # job time based on
        #  - 5 min initialization
        #  - 1.5MB/s repack speed
        #  - checksum calculation at 5MB/s
        #  - stageout at 5MB/s
        # job disk based on
        #  - input for largest file on local disk
        #  - output on local disk (factor 1)
        jobTime = 300 + jobSize / 1500000 + (jobSize * 2) / 5000000
        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                             disk=(jobSize + largestFile) /
                                             1024,
                                             memory=memoryRequirement)

        return
Example #44
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package = "WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package = "WMCore.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE", "SomeCE")
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE2", "SomeCE")
        myResourceControl.insertSite("SomeSite2", 10, 20, "SomeSE3", "SomeCE2")

        self.fileset1 = Fileset(name = "TestFileset1")
        for file in range(11):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Harvest",
                                           type = "Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return
Example #45
0
    def createFileCollection(self,
                             name,
                             nSubs,
                             nFiles,
                             workflowURL='test',
                             site=None):
        """
        _createFileCollection_

        Create a collection of files for splitting into jobs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=workflowURL,
                                owner="mnorman",
                                name=name,
                                task="/TestWorkload/ReReco")
        testWorkflow.create()

        for sub in range(nSubs):

            nameStr = '%s-%i' % (name, sub)

            testFileset = Fileset(name=nameStr)
            testFileset.create()

            for f in range(nFiles):
                # pick a random site
                if not site:
                    tmpSite = 'se.%s' % (random.choice(self.sites))
                else:
                    tmpSite = 'se.%s' % (site)
                testFile = File(lfn="/lfn/%s/%i" % (nameStr, f),
                                size=1024,
                                events=10)
                testFile.setLocation(tmpSite)
                testFile.create()
                testFileset.addFile(testFile)

            testFileset.commit()
            testFileset.markOpen(isOpen=0)
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type="Processing",
                                            split_algo="FileBased")
            testSubscription.create()

        return
Example #46
0
    def createJob(self, fileList):
        """
        _createJob_

        Create an alcaharvest job

        """
        self.newGroup()

        self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID()))

        for fileInfo in fileList:
            f = File(id=fileInfo["id"], lfn=fileInfo["lfn"])
            f.setLocation(fileInfo["location"], immediateSave=False)
            self.currentJob.addFile(f)
Example #47
0
    def createJob(self, fileList):
        """
        _createJob_

        Create an alcaharvest job

        """
        self.newGroup()

        self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID()))

        for fileInfo in fileList:
            f = File(id=fileInfo['id'], lfn=fileInfo['lfn'])
            f.setLocation(fileInfo['location'], immediateSave=False)
            self.currentJob.addFile(f)
Example #48
0
File: Repack.py Project: dmwm/T0
    def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement):
        """
        _createJob_

        """
        # find largest file
        largestFile = 0
        for streamer in streamerList:
            largestFile = max(largestFile, streamer['filesize'])

        # calculate number of cores based on disk usage
        numberOfCores = 1 + (int)((jobSize+largestFile)/(20*1000*1000*1000))

        # jobs requesting more than 8 cores would never run
        if numberOfCores > 8:
            self.markFailed(streamerList)
            return

        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # allow large (single lumi) repack to use multiple cores
        if numberOfCores > 1:
            self.currentJob.addBaggageParameter("numberOfCores", numberOfCores)

        # job time based on
        #  - 5 min initialization
        #  - 1.5MB/s repack speed
        #  - checksum calculation at 5MB/s
        #  - stageout at 5MB/s
        # job disk based on
        #  - input for largest file on local disk
        #  - output on local disk (factor 1)
        jobTime = 300 + jobSize/1500000 + (jobSize*2)/5000000
        self.currentJob.addResourceEstimates(jobTime = jobTime,
                                             disk = (jobSize+largestFile)/1024,
                                             memory = memoryRequirement)

        return
Example #49
0
    def createJob(self, streamerList):
        """
        _createJob_

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)
Example #50
0
    def createFile(lfn, events, run, lumis, location, lumiMultiplier=None):
        """
        _createFile_

        Create a file for testing
        """
        if lumiMultiplier is None:
            lumiMultiplier = run

        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((lumiMultiplier * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
    def createFile(lfn, events, run, lumis, location, lumiMultiplier=None):
        """
        _createFile_

        Create a file for testing
        """
        if lumiMultiplier is None:
            lumiMultiplier = run

        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((lumiMultiplier * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile
Example #52
0
    def test03(self):
        """
        _test03_

        Test max input files threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(lumi * 2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 3
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Example #53
0
    def testLoadData(self):
        """
        _testLoadData_

        Test the loading of all data from a file, including run/lumi
        associations, location information and parentage information.
        """
        testFileParentA = File(lfn="/this/is/a/parent/lfnA",
                               size=1024,
                               events=20,
                               checksums={'cksum': 1})
        testFileParentA.addRun(Run(1, *[45]))
        testFileParentB = File(lfn="/this/is/a/parent/lfnB",
                               size=1024,
                               events=20,
                               checksums={'cksum': 1})
        testFileParentB.addRun(Run(1, *[45]))
        testFileParentA.create()
        testFileParentB.create()

        testFileA = File(lfn="/this/is/a/lfn",
                         size=1024,
                         events=10,
                         checksums={'cksum': 1})
        testFileA.addRun(Run(1, *[45]))
        testFileA.create()
        testFileA.setLocation(se="se1.fnal.gov", immediateSave=False)
        testFileA.setLocation(se="se1.cern.ch", immediateSave=False)
        testFileA.addParent("/this/is/a/parent/lfnA")
        testFileA.addParent("/this/is/a/parent/lfnB")
        testFileA.updateLocations()

        testFileB = File(lfn=testFileA["lfn"])
        testFileB.loadData(parentage=1)
        testFileC = File(id=testFileA["id"])
        testFileC.loadData(parentage=1)

        assert testFileA == testFileB, \
               "ERROR: File load by LFN didn't work"

        assert testFileA == testFileC, \
               "ERROR: File load by ID didn't work"

        testFileA.delete()
        testFileParentA.delete()
        testFileParentB.delete()
        return
Example #54
0
    def test03(self):
        """
        _test03_

        Test max input files threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(lumi * 2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 3
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Example #55
0
    def createTestJobGroup(self):
        """
        Creates a group of several jobs

        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Simon",
                                name="wf001",
                                task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')
        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, self.nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJobGroup.add(testJob)

        testJobGroup.commit()

        return testJobGroup