Ejemplos de Fileset.getFiles en Python

Lenguaje de programación: Python

Namespace/Package Name: WMCore.WMBS.Fileset

Clase / Tipo: Fileset

Método / Función: getFiles

Ejemplos en hotexamples.com: 5

Python Fileset.getFiles - 5 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de WMCore.WMBS.Fileset.Fileset.getFiles extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Fileset(30)

addFile(30)

commit(30)

create(30)

loadData(25)

markOpen(21)

load(12)

exists(6)

delete(3)

getFiles(2)

addFilesToWMBSInBulk(1)

setLastUpdate(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: Harvest_t.py Proyecto: AndrewLevin/WMCore

class HarvestTest(unittest.TestCase):
    """
    _HarvestTest_

    Test for EndOfRun job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package = "WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package = "WMCore.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE", "SomeCE")
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE2", "SomeCE")
        myResourceControl.insertSite("SomeSite2", 10, 20, "SomeSE3", "SomeCE2")

        self.fileset1 = Fileset(name = "TestFileset1")
        for file in range(11):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Harvest",
                                           type = "Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()
        EmulatorSetup.deleteConfig(self.configFile)

        return

    def getConfig(self):
        """
        _getConfig_

        """
        config = self.testInit.getConfiguration()
        self.testInit.generateWorkDir(config)

        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket     = os.getenv("DBSOCK")

        # JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl        = os.getenv('COUCHURL', None)
        config.JobStateMachine.couchDBName     = 'wmagent_jobdump'

        return config

    def finishJobs(self, jobGroups, subscription = None):
        """
        _finishJobs_

        """
        if not subscription:
            subscription = self.subscription1
        for f in subscription.acquiredFiles():
            subscription.completeFiles(f)

        for jobGroup in jobGroups:
            self.changer.propagate(jobGroup.jobs, 'executing', 'created')
            self.changer.propagate(jobGroup.jobs, 'complete', 'executing')
            self.changer.propagate(jobGroup.jobs, 'success', 'complete')
            self.changer.propagate(jobGroup.jobs, 'cleanout', 'success')

        return

    def testHarvestEndOfRunTrigger(self):
        """
        _testDQMHarvestEndOfRunTrigger_

        Make sure that the basic splitting algo works, which is only, ExpressMerge is ALL done, fire a job against that fileset

        """
        self.assertEqual(self.fileset1.open, True, "Fileset is closed. Shouldn't")

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)

        jobGroups = jobFactory()

        self.assertEqual(len(jobGroups), 0 , "We got 1 or more jobGroups with an open fileset and no periodic configuration")

        self.fileset1.markOpen(False)
        self.assertEqual(self.fileset1.open, False, "Fileset is opened, why?")

        # We should also check if there are aqcuired files, if there are, there are jobs,
        # we don't want to fire another jobs while previous are running (output is integrating whatever  input)
        # TODO : The above one we can do when all is done. Not priority

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory()

        self.assertEqual(len(jobGroups), 1 , "Harvest jobsplitter didn't create a single jobGroup after the fileset was closed")

        return

    def testPeriodicTrigger(self):
        """
        _testPeriodicTrigger_

        """
        self.assertEqual(self.fileset1.open, True, "Fileset is not open, not testing periodic here")
        # Test timeout (5s for this first test)
        # there should be no acquired files, if there are, shouldn't be a job
        #self.subscription1.acquireFiles(self.subscription1.availableFiles().pop())

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 3)

        self.assertEqual(len(jobGroups), 1 , "Didn't created the first periodic job when there were acquired files")

        # For the whole thing to work, faking the first job finishing, and putting the files as complete
        self.finishJobs(jobGroups)

        # Adding more of files, so we have new stuff to process
        for file in range(12,24):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # Testing that it doesn't create a job unless the delay is past
        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups), 0 , "Created one or more job, when there were non-acquired file and the period is not passed by")

        time.sleep(2)

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups), 1 , "Didn't created one or more job, and there weren't and the period is passed by")

        # Finishing out previous jobs
        self.finishJobs(jobGroups)

        # Adding more of files, so we have new stuff to process
        for file in range(26,36):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # Trying to create another job just afterwards, it shouldn't, because it should respect the configured delay
        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups), 0 , "Created one or more job, there are new files, but the delay is not past")

        time.sleep(2)

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups), 1 , "Didn't created one or more job, there are new files and the delay is past")

        # Last check is whether the job gets all the files or not

        numFilesJob = jobGroups[0].jobs[0].getFiles()
        numFilesFileset = self.fileset1.getFiles()
        self.assertEqual(numFilesJob, numFilesFileset, "Job didn't got all the files")

        # Finishing out previous jobs
        self.finishJobs(jobGroups)

        # Adding files for the first location
        for file in range(38,48):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()
        # Then another location
        for file in range(50,56):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(1,*[1]))
            newFile.setLocation('SomeSE3')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # We should have jobs in both locations
        time.sleep(2)

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        self.assertEqual(len(jobGroups[0].getJobs()), 2 , "We didn't get 2 jobs for 2 locations")

        firstJobLocation = jobGroups[0].getJobs()[0].getFileLocations()[0]
        secondJobLocation = jobGroups[0].getJobs()[1].getFileLocations()[0]

        self.assertEqual(firstJobLocation, 'SomeSite', "First job location is not SomeSite")
        self.assertEqual(secondJobLocation, 'SomeSite2', "Second job location is not SomeSite2")

        self.finishJobs(jobGroups)

        for file in range(60,65):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(2,*[2]))
            newFile.setLocation('SomeSE3')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        for file in range(70,75):
            newFile = File("/some/file/name%d" % file, size = 1000, events = 100)
            newFile.addRun(Run(3,*[3]))
            newFile.setLocation('SomeSE3')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        time.sleep(2)

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval = 2)

        # This is one of the most "complicated" tests so worth to comment, 4 jobs should be created
        # 1 - all previous files from SomeSE and run = 1 (a lot, like ~45)
        # 2 - Few files from SomeSE3, Run = 1
        # 3 - Few files from SomeSE3, Run = 2
        # 4 - Few files from SomeSE3, Run = 3
        self.assertEqual(len(jobGroups[0].getJobs()), 4 , "We didn't get 4 jobs for adding 2 different runs to SomeSE3")

        return

    def testMultipleRunHarvesting(self):
        """
        _testMultipleRunHarvesting_

        Add some files with multiple runs in each, make sure the jobs
        are created by location and run. Verify each job mask afterwards.
        Note that in this test run are splitted between sites,
        in real life that MUST NOT happen we still don't support that.
        """
        multipleFilesFileset = Fileset(name = "TestFileset")

        newFile = File("/some/file/test1", size = 1000, events = 100)
        newFile.addRun(Run(1,*[1,3,4,5,6,7]))
        newFile.addRun(Run(2,*[1,2,4,5,6,7]))
        newFile.setLocation('SomeSE')
        multipleFilesFileset.addFile(newFile)
        newFile = File("/some/file/test2", size = 1000, events = 100)
        newFile.addRun(Run(1,*[2,8]))
        newFile.addRun(Run(2,*[3,8]))
        newFile.setLocation('SomeSE3')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.create()

        harvestingWorkflow = Workflow(spec = "spec.xml",
                                      owner = "hufnagel",
                                      name = "TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub  = Subscription(fileset = multipleFilesFileset,
                                   workflow = harvestingWorkflow,
                                   split_algo = "Harvest",
                                   type = "Harvesting")
        harvestSub.create()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS", subscription = harvestSub)
        jobGroups = jobFactory(periodic_harvest_interval = 2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4,
                             "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8]],
                                 2 : [[1,2],[4,7],[3,3],[8,8]]}
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")

        self.finishJobs(jobGroups, harvestSub)

        newFile = File("/some/file/test3", size = 1000, events = 100)
        newFile.addRun(Run(1,*range(9,15)))
        newFile.setLocation('SomeSE3')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.commit()

        time.sleep(2)

        jobGroups = jobFactory(periodic_harvest_interval = 2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8],[9,14]],
                                 2 : [[1,2],[4,7],[3,3],[8,8]]}
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")

        harvestingWorkflowSib = Workflow(spec = "spec.xml",
                                         owner = "hufnagel",
                                         name = "TestWorkflowSib",
                                         task="TestSib")
        harvestingWorkflowSib.create()

        harvestSubSib  = Subscription(fileset = multipleFilesFileset,
                                      workflow = harvestingWorkflowSib,
                                      split_algo = "Harvest",
                                      type = "Harvesting")
        harvestSubSib.create()

        jobFactorySib = self.splitterFactory(package = "WMCore.WMBS", subscription = harvestSubSib)

        multipleFilesFileset.markOpen(False)

        jobGroups = jobFactorySib(periodic_harvest_sibling = True)
        self.assertEqual(len(jobGroups), 0, "A single job group was created")
                
        self.finishJobs(jobGroups, harvestSub)

        jobGroups = jobFactorySib(periodic_harvest_sibling = True)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            possibleLumiPairs = {1 : [[1,1],[3,7],[2,2],[8,8],[9,14]],
                                 2 : [[1,2],[4,7],[3,3],[8,8]]}
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                self.assertTrue(lumiPair in possibleLumiPairs[run], "Strange lumi pair in the job mask")

Ejemplo n.º 2

Mostrar archivo

Archivo: RepackMerge_t.py Proyecto: johnhcasallasl/T0

class RepackMergeTest(unittest.TestCase):
    """
    _RepackMergeTest_

    Test for RepackMerge job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()

        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN2')
                                    """, transaction = False)


        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'TIME' : int(time.time()),
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 2 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 3 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 4 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 5 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "A" },
                                transaction = False)

        insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" },
                                     transaction = False)

        insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion")
        insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1,
                                                      'STREAM' : 'A',
                                                      'VERSION' : "CMSSW_4_2_7" },
                                            transaction = False)

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : 4,
                                            'STREAM' : "A",
                                            'LFN' : "/testLFN/A",
                                            'FILESIZE' : 100,
                                            'EVENTS' : 100,
                                            'TIME' : int(time.time()) },
                                  transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset2 = Fileset(name = "TestFileset2")
        self.fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Repack",
                                           type = "Repack")
        self.subscription2  = Subscription(fileset = self.fileset2,
                                           workflow = workflow2,
                                           split_algo = "RepackMerge",
                                           type = "RepackMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction = False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers")                                                      
        self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles")
        self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024
        self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 100000000
        self.splitArgs['maxInputFiles'] = 1000
        self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def deleteSplitLumis(self):
        """
        _deleteSplitLumis_

        """
        myThread = threading.currentThread()

        myThread.dbi.processData("""DELETE FROM lumi_section_split_active
                                    """,
                                 transaction = False)

        return

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works

        Test max edm size threshold for single lumi

        small lumi, followed by over-large lumi
        expect 1 job for small lumi and 4 jobs for over-large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2 * lumi):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxEdmSize'] = 13000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("RepackMerge-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 3,
                         "ERROR: Job does not process 3 files")

        job = jobGroups[0].jobs[2]
        self.assertEqual(len(job.getFiles()), 1,
                         "ERROR: Job does not process 1 file")

        return

    def test01(self):
        """
        _test01_

        Test max size threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test02(self):
        """
        _test02_

        Test max event threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100 * lumi)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test03(self):
        """
        _test03_

        Test max input files threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(lumi * 2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 3
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test04(self):
        """
        _test04_

        Test max size threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test05(self):
        """
        _test05_

        Test max event threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test06(self):
        """
        _test06_

        Test max input files threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test07(self):
        """
        _test07_

        Test over merge

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test08(self):
        """
        _test08_

        Test under merge (over merge size threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxOverSize'] = 9500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test09(self):
        """
        _test09_

        Test under merge (over merge event threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test10(self):
        """
        _test10_

        Test merging of multiple lumis with holes in the lumi sequence

        Hole is due to no streamer files for the lumi

        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 5]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 4,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 1,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        self.feedStreamersDAO.execute(transaction = False)
        self.fileset1.loadData()

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.acquireFilesDAO.execute(self.subscription1['id'], fileid,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.completeFilesDAO.execute(self.subscription1['id'], fileid,
                                          transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

Ejemplo n.º 3

Mostrar archivo

class RepackMergeTest(unittest.TestCase):
    """
    _RepackMergeTest_

    Test for RepackMerge job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()

        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN2')
                                    """, transaction = False)


        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 2 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 3 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 4 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 5 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "A" },
                                transaction = False)

        insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" },
                                     transaction = False)

        insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion")
        insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1,
                                                      'STREAM' : 'A',
                                                      'VERSION' : "CMSSW_4_2_7" },
                                            transaction = False)

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : 4,
                                            'STREAM' : "A",
                                            'LFN' : "/testLFN/A",
                                            'FILESIZE' : 100,
                                            'EVENTS' : 100,
                                            'TIME' : int(time.time()) },
                                  transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset2 = Fileset(name = "TestFileset2")
        self.fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Repack",
                                           type = "Repack")
        self.subscription2  = Subscription(fileset = self.fileset2,
                                           workflow = workflow2,
                                           split_algo = "RepackMerge",
                                           type = "RepackMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction = False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers")                                                      
        self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles")
        self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024
        self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 100000000
        self.splitArgs['maxInputFiles'] = 1000
        self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def deleteSplitLumis(self):
        """
        _deleteSplitLumis_

        """
        myThread = threading.currentThread()

        myThread.dbi.processData("""DELETE FROM lumi_section_split_active
                                    """,
                                 transaction = False)

        return

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works

        Test max edm size threshold for single lumi

        small lumi, followed by over-large lumi
        expect 1 job for small lumi and 4 jobs for over-large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2 * lumi):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxEdmSize'] = 13000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("RepackMerge-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 3,
                         "ERROR: Job does not process 3 files")

        job = jobGroups[0].jobs[2]
        self.assertEqual(len(job.getFiles()), 1,
                         "ERROR: Job does not process 1 file")

        return

    def test01(self):
        """
        _test01_

        Test max size threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test02(self):
        """
        _test02_

        Test max event threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100 * lumi)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test03(self):
        """
        _test03_

        Test max input files threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(lumi * 2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 3
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test04(self):
        """
        _test04_

        Test max size threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test05(self):
        """
        _test05_

        Test max event threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test06(self):
        """
        _test06_

        Test max input files threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test07(self):
        """
        _test07_

        Test over merge

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test08(self):
        """
        _test08_

        Test under merge (over merge size threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxOverSize'] = 9500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test09(self):
        """
        _test09_

        Test under merge (over merge event threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test10(self):
        """
        _test10_

        Test merging of multiple lumis with holes in the lumi sequence

        Hole is due to no streamer files for the lumi

        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 5]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 4,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 1,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        self.feedStreamersDAO.execute(transaction = False)
        self.fileset1.loadData()

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.acquireFilesDAO.execute(self.subscription1['id'], fileid,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.completeFilesDAO.execute(self.subscription1['id'], fileid,
                                          transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

Ejemplo n.º 4

Mostrar archivo

Archivo: Harvest_t.py Proyecto: vytjan/WMCore

class HarvestTest(unittest.TestCase):
    """
    _HarvestTest_

    Test for EndOfRun job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T1_US_FNAL_Disk", "T1_US_FNAL")
        myResourceControl.insertSite("T1_US_FNAL", 10, 20, "T3_US_FNALLPC", "T1_US_FNAL")
        myResourceControl.insertSite("T2_CH_CERN", 10, 20, "T2_CH_CERN", "T2_CH_CERN")

        self.fileset1 = Fileset(name="TestFileset1")
        for fileNum in range(11):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Harvest",
                                          type="Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()
        EmulatorSetup.deleteConfig(self.configFile)

        return

    def getConfig(self):
        """
        _getConfig_

        """
        config = self.testInit.getConfiguration()
        self.testInit.generateWorkDir(config)

        config.section_("CoreDatabase")
        config.CoreDatabase.connectUrl = os.getenv("DATABASE")
        config.CoreDatabase.socket = os.getenv("DBSOCK")

        # JobStateMachine
        config.component_('JobStateMachine')
        config.JobStateMachine.couchurl = os.getenv('COUCHURL', None)
        config.JobStateMachine.couchDBName = 'wmagent_jobdump'

        return config

    def finishJobs(self, jobGroups, subscription=None):
        """
        _finishJobs_

        """
        if not subscription:
            subscription = self.subscription1
        for f in subscription.acquiredFiles():
            subscription.completeFiles(f)

        for jobGroup in jobGroups:
            self.changer.propagate(jobGroup.jobs, 'executing', 'created')
            self.changer.propagate(jobGroup.jobs, 'complete', 'executing')
            self.changer.propagate(jobGroup.jobs, 'success', 'complete')
            self.changer.propagate(jobGroup.jobs, 'cleanout', 'success')

        return

    def testHarvestEndOfRunTrigger(self):
        """
        _testDQMHarvestEndOfRunTrigger_

        Make sure that the basic splitting algo works, which is only, ExpressMerge is ALL done, fire a job against that fileset

        """
        self.assertEqual(self.fileset1.open, True, "Fileset is closed. Shouldn't")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)

        jobGroups = jobFactory()

        self.assertEqual(len(jobGroups), 0,
                         "We got 1 or more jobGroups with an open fileset and no periodic configuration")

        self.fileset1.markOpen(False)
        self.assertEqual(self.fileset1.open, False, "Fileset is opened, why?")

        # We should also check if there are aqcuired files, if there are, there are jobs,
        # we don't want to fire another jobs while previous are running (output is integrating whatever  input)
        # TODO : The above one we can do when all is done. Not priority

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory()

        self.assertEqual(len(jobGroups), 1,
                         "Harvest jobsplitter didn't create a single jobGroup after the fileset was closed")

        return

    def testPeriodicTrigger(self):
        """
        _testPeriodicTrigger_

        """
        self.assertEqual(self.fileset1.open, True, "Fileset is not open, not testing periodic here")
        # Test timeout (5s for this first test)
        # there should be no acquired files, if there are, shouldn't be a job
        # self.subscription1.acquireFiles(self.subscription1.availableFiles().pop())

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=3)

        self.assertEqual(len(jobGroups), 1, "Didn't created the first periodic job when there were acquired files")

        # For the whole thing to work, faking the first job finishing, and putting the files as complete
        self.finishJobs(jobGroups)

        # Adding more of files, so we have new stuff to process
        for fileNum in range(12, 24):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # Testing that it doesn't create a job unless the delay is past
        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups), 0,
                         "Created one or more job, when there were non-acquired file and the period is not passed by")

        time.sleep(2)

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups), 1,
                         "Didn't created one or more job, and there weren't and the period is passed by")

        # Finishing out previous jobs
        self.finishJobs(jobGroups)

        # Adding more of files, so we have new stuff to process
        for fileNum in range(26, 36):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # Trying to create another job just afterwards, it shouldn't, because it should respect the configured delay
        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups), 0, "Created one or more job, there are new files, but the delay is not past")

        time.sleep(2)

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups), 1, "Didn't created one or more job, there are new files and the delay is past")

        # Last check is whether the job gets all the files or not

        numFilesJob = jobGroups[0].jobs[0].getFiles()
        numFilesFileset = self.fileset1.getFiles()
        self.assertEqual(numFilesJob, numFilesFileset, "Job didn't got all the files")

        # Finishing out previous jobs
        self.finishJobs(jobGroups)

        # Adding files for the first location
        for fileNum in range(38, 48):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T1_US_FNAL_Disk')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()
        # Then another location
        for fileNum in range(50, 56):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('T2_CH_CERN')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        # We should have jobs in both locations
        time.sleep(2)

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        self.assertEqual(len(jobGroups[0].getJobs()), 2, "We didn't get 2 jobs for 2 locations")

        firstJobLocation = jobGroups[0].getJobs()[0].getFileLocations()[0]
        secondJobLocation = jobGroups[0].getJobs()[1].getFileLocations()[0]

        self.assertEqual(firstJobLocation, 'T2_CH_CERN')
        self.assertEqual(secondJobLocation, 'T1_US_FNAL')

        self.finishJobs(jobGroups)

        for fileNum in range(60, 65):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(2, *[2]))
            newFile.setLocation('T2_CH_CERN')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        for fileNum in range(70, 75):
            newFile = File("/some/file/name%d" % fileNum, size=1000, events=100)
            newFile.addRun(Run(3, *[3]))
            newFile.setLocation('T2_CH_CERN')
            self.fileset1.addFile(newFile)
        self.fileset1.commit()

        time.sleep(2)

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=self.subscription1)
        jobGroups = jobFactory(periodic_harvest_interval=2)

        # This is one of the most "complicated" tests so worth to comment, 4 jobs should be created
        # 1 - all previous files from SomeSE and run = 1 (a lot, like ~45)
        # 2 - Few files from SomeSE3, Run = 1
        # 3 - Few files from SomeSE3, Run = 2
        # 4 - Few files from SomeSE3, Run = 3
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "We didn't get 4 jobs for adding 2 different runs to SomeSE3")

        return

    def testMultipleRunHarvesting(self):
        """
        _testMultipleRunHarvesting_

        Add some files with multiple runs in each, make sure the jobs
        are created by location and run. Verify each job mask afterwards.
        Note that in this test run are splitted between sites,
        in real life that MUST NOT happen we still don't support that.
        """
        multipleFilesFileset = Fileset(name="TestFileset")

        newFile = File("/some/file/test1", size=1000, events=100)
        newFile.addRun(Run(1, *[1, 3, 4, 5, 6, 7]))
        newFile.addRun(Run(2, *[1, 2, 4, 5, 6, 7]))
        newFile.setLocation('T1_US_FNAL_Disk')
        multipleFilesFileset.addFile(newFile)
        newFile = File("/some/file/test2", size=1000, events=100)
        newFile.addRun(Run(1, *[2, 8]))
        newFile.addRun(Run(2, *[3, 8]))
        newFile.setLocation('T2_CH_CERN')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.create()

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="hufnagel",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(periodic_harvest_interval=2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4,
                         "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8]],
                                       2: [[1, 2], [4, 7], [3, 3], [8, 8]]})
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                for lumi in range(lumiPair[0], lumiPair[1] + 1):
                    self.assertTrue((str(run), lumi) in ll, "All of %s not in %s" % (lumiPair, ll))

        self.finishJobs(jobGroups, harvestSub)

        newFile = File("/some/file/test3", size=1000, events=100)
        newFile.addRun(Run(1, *range(9, 15)))
        newFile.setLocation('T2_CH_CERN')
        multipleFilesFileset.addFile(newFile)
        multipleFilesFileset.commit()

        time.sleep(2)

        jobGroups = jobFactory(periodic_harvest_interval=2)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8], [9, 14]],
                                       2: [[1, 2], [4, 7], [3, 3], [8, 8]]})
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                for lumi in range(lumiPair[0], lumiPair[1] + 1):
                    self.assertTrue((run, lumi) in ll, "All of %s not in %s" % (lumiPair, ll))

        harvestingWorkflowSib = Workflow(spec="spec.xml",
                                         owner="hufnagel",
                                         name="TestWorkflowSib",
                                         task="TestSib")
        harvestingWorkflowSib.create()

        harvestSubSib = Subscription(fileset=multipleFilesFileset,
                                     workflow=harvestingWorkflowSib,
                                     split_algo="Harvest",
                                     type="Harvesting")
        harvestSubSib.create()

        jobFactorySib = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSubSib)

        multipleFilesFileset.markOpen(False)

        jobGroups = jobFactorySib(periodic_harvest_sibling=True)
        self.assertEqual(len(jobGroups), 0, "A single job group was created")

        self.finishJobs(jobGroups, harvestSub)

        jobGroups = jobFactorySib(periodic_harvest_sibling=True)
        self.assertEqual(len(jobGroups), 1, "A single job group was not created")
        self.assertEqual(len(jobGroups[0].getJobs()), 4, "Four jobs were not created")

        for job in jobGroups[0].getJobs():
            runs = job['mask'].getRunAndLumis()
            self.assertEqual(len(runs), 1, "Job has more than one run configured")
            ll = LumiList(compactList={1: [[1, 1], [3, 7], [2, 2], [8, 8], [9, 14]],
                                       2: [[1, 2], [4, 7], [3, 3], [8, 8]]})
            run = runs.keys()[0]
            for lumiPair in runs[run]:
                for lumi in range(lumiPair[0], lumiPair[1] + 1):
                    self.assertTrue((run, lumi) in ll, "All of %s not in %s" % (lumiPair, ll))

    def testMultiRunHarvesting(self):
        """
        _testMultiRunHarvesting_

        Provided a fileset with a couple of files and different runs, create a
        single job for all the runs at a specific location, which also adds a
        baggage to the job (True) which is later on looked up by SetupCMSSWPSet.
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="amaltaro",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(dqmHarvestUnit="multiRun")
        self.assertEqual(len(jobGroups), 1)

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 1)
            for job in jobGroup.jobs:
                baggage = job.getBaggage()
                self.assertTrue(getattr(baggage, "multiRun", False), "It's supposed to be a multiRun job")
                self.assertEqual(getattr(baggage, "runLimits", ""), "-1-6")

    def testByRunHarvesting(self):
        """
        _testByRunHarvesting_
        Provided a fileset with a couple of files and 4 different runs, create
        one single job per run and location.
        The multiRun baggage should be false in this case.
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True, "Fileset should be open!")

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="amaltaro",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory()
        self.assertEqual(len(jobGroups), 1, "Should have created 1 job group")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 6, "Should have created 6 jobs")
            for job in jobGroup.jobs:
                baggage = job.getBaggage()
                self.assertFalse(getattr(baggage, "multiRun", False), "It's supposed to be a byRun job")

    def testByRunAndRunWhitelist(self):
        """
        _testByRunAndRunWhitelist_

        Create harvesting jobs by run for the runs provided in the RunWhitelist
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro",
                                      name="TestWorkflow", task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow,
                                  split_algo="Harvest", type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(runWhitelist=[1, 3])
        self.assertEqual(len(jobGroups), 1, "One jobgroup per location")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 2)

    def testByRunAndRunBlacklist(self):
        """
        _testByRunAndRunWhitelist_

        Create harvesting jobs by run for the runs provided in the RunWhitelist
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro",
                                      name="TestWorkflow", task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow,
                                  split_algo="Harvest", type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(runWhitelist=[1, 2, 3, 4, 5], runBlacklist=[1, 3])
        self.assertEqual(len(jobGroups), 1, "One jobgroup per location")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 3)

Ejemplo n.º 5

Mostrar archivo

Archivo: Periodic_t.py Proyecto: zhiwenuil/WMCore

class PeriodicTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Create a single subscription with one file.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)
        
        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "site1", seName = "somese.cern.ch")
        locationAction.execute(siteName = "site2", seName = "otherse.cern.ch")
        
        self.testFileset = Fileset(name = "TestFileset1")
        self.testFileset.create()
        
        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test" )
        testWorkflow.create()
        self.testSubscription = Subscription(fileset = self.testFileset,
                                             workflow = testWorkflow,
                                             split_algo = "Periodic",
                                             type = "Processing")
        self.testSubscription.create()
        return
    
    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return            

    def injectFile(self):
        """
        _injectFile_

        Inject a file into the periodic splitting input fileset.
        """
        testFile = File(lfn = "/this/is/a/lfn%s" % time.time(), size = 1000,
                        events = 100, locations = set(["somese.cern.ch"]))
        testFile.create()
        self.testFileset.addFile(testFile)    
        self.testFileset.commit()

        return

    def verifyFiles(self, wmbsJob):
        """
        _verifyFiles_

        Verify that the input files for the job are the same as the files in the
        input fileset.
        """
        inputFiles = wmbsJob.getFiles()
        filesetFiles = self.testFileset.getFiles()

        for inputFile in inputFiles:
            assert inputFile in filesetFiles, \
                   "ERROR: Unknown file: %s" % inputFile
            filesetFiles.remove(inputFile)

        assert len(filesetFiles) == 0, \
               "ERROR: Not all files included in job."
                
        return
    
    def testPeriodicSplitting(self):
        """
        _testPeriodiciSplitting_

        Manipulate the splitting algorithm to test the corner cases.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.testSubscription)

        # First pass: no jobs exist.  The algorithm should create a job
        # containing all available files.
        self.injectFile()
        jobGroups = jobFactory(job_period = 99999999999)

        assert len(jobGroups) == 1, \
               "ERROR: Wrong number of job groups returned: %s" % len(jobGroups)

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: Jobgroup has wrong number of jobs: %s" % len(jobGroups[0].jobs)

        wmbsJob = jobGroups[0].jobs.pop()
        self.verifyFiles(wmbsJob)

        # Verify that no jobs are generated as the previously issued job has not
        # completed yet.
        time.sleep(5)
        self.injectFile()
        moreJobGroups = jobFactory(job_period = 1)    

        assert len(moreJobGroups) == 0, \
               "ERROR: No jobgroups should be returned."

        # Complete the job so that the splitting algorithm will generate
        # another job.
        wmbsJob["state"] = "cleanout"
        wmbsJob["oldstate"] = "new"
        wmbsJob["couch_record"] = "somejive"
        wmbsJob["retry_count"] = 0
        changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState")
        changeStateDAO.execute([wmbsJob])

        # Verify that no jobs will be generated if the period has not yet
        # expried.
        self.injectFile()
        moreJobGroups = jobFactory(job_period = 999999999999)

        assert len(moreJobGroups) == 0, \
               "ERROR: No jobgroups should be returned."

        # Verify that a job will be generated if the period has expired.
        time.sleep(5)
        self.injectFile()
        jobGroups = jobFactory(job_period = 1)

        assert len(jobGroups) == 1, \
               "ERROR: Wrong number of job groups returned: %s" % len(jobGroups)

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: Jobgroup has wrong number of jobs: %s" % len(jobGroups[0].jobs)

        self.verifyFiles(jobGroups[0].jobs.pop())

        # Verify that no jobs will be generated in the case that a periodic job
        # is still running and the fileset has been closed.
        self.testFileset.markOpen(False)
        time.sleep(5)
        self.injectFile()
        jobGroups = jobFactory(job_period = 1)

        assert len(jobGroups) == 0, \
               "ERROR: Wrong number of job groups returned: %s" % len(jobGroups)

        # Complete the outstanding job.
        wmbsJob["state"] = "cleanout"
        wmbsJob["oldstate"] = "new"
        wmbsJob["couch_record"] = "somejive"
        wmbsJob["retry_count"] = 0
        changeStateDAO.execute([wmbsJob])

        # Verify that when the input fileset is closed and all periodic jobs
        # are complete a job will not be generated.
        self.injectFile()
        jobGroups = jobFactory(job_period = 99999999999)

        assert len(jobGroups) == 0, \
               "ERROR: Wrong number of job groups returned: %s" % len(jobGroups)

        # Verify that after the final job is complete no more jobs are generated.
        wmbsJob["state"] = "cleanout"
        wmbsJob["oldstate"] = "new"
        wmbsJob["couch_record"] = "somejive"
        wmbsJob["retry_count"] = 0
        changeStateDAO.execute([wmbsJob])

        time.sleep(5)
        self.injectFile()
        moreJobGroups = jobFactory(job_period = 1)

        assert len(moreJobGroups) == 0, \
               "ERROR: No jobgroups should be returned."

        return