Python Fileset.addFile Examples, WMCore.DataStructs.Fileset.Fileset.addFile Python Examples

Example #1

0

Show file

File: MergeBySize.py Project: AndresTanasijczuk/WMCore

    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Implement merge algorithm for the subscription provided

        """
        fileset = list(self.subscription.availableFiles())

        mergeSize = int(kwargs['merge_size'])
        overflow  = bool(kwargs.get('all_files', False))
        fileset.sort()

        accumSize = 0
        jobFiles = Fileset()
        locationDict = self.sortByLocation()
        for location in locationDict:
            baseName = makeUUID()
            self.newGroup()
            for f in locationDict[location]:
                accumSize += f['size']
                jobFiles.addFile(f)
                if accumSize >= mergeSize:
                    self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
                                      files = jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
                    accumSize = 0
                    jobFiles = Fileset()

            if len(jobFiles) > 0:
                if overflow:
                    self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
                                      files = jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)

Example #2

0

Show file

File: EventAwareLumiBased_t.py Project: ticoann/WMCore

    def createSubscription(self, nFiles, lumisPerFile, twoSites = False, nEventsPerFile = 100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name = baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
                                          i, lumisPerFile, 'malpaquet')
                testFileset.addFile(newFile)


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "EventAwareLumiBased",
                                         type = "Processing")

        return testSubscription

Example #3

0

Show file

    def execute(self, *args, **kwargs):  #pylint: disable=unused-argument

        # since https://github.com/dmwm/CRABServer/issues/5633 totalunits can be a float
        # but that would confuse WMCore, therefore cast to int
        totalevents = int(kwargs['task']['tm_totalunits'])
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name="MCFakeFileSet")
        newFile = File("MCFakeFile", size=1000, events=totalevents)
        newFile.setLocation(self.getListOfSites())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFakeBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)

Example #4

0

Show file

File: EventBased_t.py Project: vytjan/WMCore

    def generateFakeMCFile(self,
                           numEvents=100,
                           firstEvent=1,
                           lastEvent=100,
                           firstLumi=1,
                           lastLumi=10,
                           existingSub=None):
        # MC comes with only one MCFakeFile
        newFile = File("MCFakeFileTest", size=1000, events=numEvents)
        newFile.setLocation('se01')
        if firstLumi == lastLumi:
            newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        else:
            newFile.addRun(Run(1, *range(firstLumi, lastLumi)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent

        if existingSub is None:
            singleMCFileset = Fileset(name="MCTestFileset")
            singleMCFileset.addFile(newFile)
            testWorkflow = Workflow()
            existingSub = Subscription(fileset=singleMCFileset,
                                       workflow=testWorkflow,
                                       split_algo="EventBased",
                                       type="Production")
        else:
            existingSub['fileset'].addFile(newFile)

        return existingSub

Example #5

0

Show file

File: JobFactory_t.py Project: BrunoCoimbra/WMCore

    def testMetaData(self):
        """
        _testMetaData_

        Make sure that the workflow name, task, owner and white and black lists
        make it into each job object.
        """
        testWorkflow = Workflow(spec = "spec.pkl", owner = "Steve",
                                name = "TestWorkflow", task = "TestTask")

        testFileset = Fileset(name = "TestFileset")
        testFile = File(lfn = "someLFN")
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = testWorkflow,
                                        split_algo = "FileBased")

        myJobFactory = JobFactory(subscription = testSubscription)
        testJobGroups =  myJobFactory(siteWhitelist = ["site1"], siteBlacklist = ["site2"])
        self.assertTrue(len(testJobGroups) > 0)

        for testJobGroup in testJobGroups:
            self.assertTrue(len(testJobGroup.jobs) > 0)
            for job in testJobGroup.jobs:
                self.assertEqual(job["task"], "TestTask", "Error: Task is wrong.")
                self.assertEqual(job["workflow"], "TestWorkflow", "Error: Workflow is wrong.")
                self.assertEqual(job["owner"], "Steve", "Error: Owner is wrong.")
        return

Example #6

0

Show file

    def execute(self, *args, **kwargs):

        totalevents = kwargs['task']['tm_totalunits']
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        if hasattr(self.config.Sites, 'available'):
            newFile.setLocation(self.config.Sites.available)
        else:
            sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
                              "cert":self.config.TaskWorker.cmscert})
            newFile.setLocation(sbj.getAllCMSNames())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFackBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)

Example #7

0

Show file

File: ADSJobFactory.py Project: PerilousApricot/CRAB2

    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("SplitSize = %s" % self.splitSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        thefiles = Fileset(name='FilesToSplit')
        reader = DBSReader(self.dbsUrl)
        fileList = reader.dbs.listFiles(analysisDataset = self.inputDataset(),
                                        retriveList = [ 'retrive_block',
                                                        'retrive_run'])

        blocks = {}

        for f in fileList:
            block = f['Block']['Name']
            if not blocks.has_key(block):
                blocks[block] = reader.listFileBlockLocation(block)
            f['Block']['StorageElementList'].extend(blocks[block])
            wmbsFile = File(f['LogicalFileName'])
            [ wmbsFile['locations'].add(x) for x in blocks[block] ]
            wmbsFile['block'] = block
            thefiles.addFile(
                wmbsFile
                )


        work = Workflow()
        subs = Subscription(
            fileset = thefiles,
            workflow = work,
            split_algo = 'FileBased',
            type = "Processing")
        splitter = SplitterFactory()
        jobfactory = splitter(subs)

        jobs = jobfactory(files_per_job = self.splitSize)



        jobDefs = []
        for job in jobs.jobs:
            #job.mask.setMaxAndSkipEvents(-1, 0)
            jobDef = JobDefinition()
            jobDef['LFNS'].extend(job.listLFNs())
            jobDef['SkipEvents'] = 0
            jobDef['MaxEvents'] = -1
            [ jobDef['SENames'].extend(list(x['locations']))
              for x  in job.listFiles() ]
            jobDefs.append(jobDef)


        return jobDefs

Example #8

0

Show file

File: EventAwareLumiByWork_t.py Project: todor-ivanov/WMCore

    def testHardLimitSplittingOnly(self):
        """
        _testHardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)

        # Fail single lumis with more than 800 events and put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550,
                               job_time_limit=9600, performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        for job in jobs:
            self.assertTrue(job['failedOnCreation'])
            self.assertIn(' with too many events 1000 and it woud take 12000 sec to run', job['failedReason'])

        return

Example #9

0

Show file

File: EventAwareLumiBased_t.py Project: lucacopa/WMCore

    def testG_LumiMask(self):
        """
        _testG_LumiMask_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn = "/this/is/file1", size = 1000, events = 800)
        fileB = File(lfn = "/this/is/file2", size = 1000, events = 400)
        fileC = File(lfn = "/this/is/file3", size = 1000, events = 500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name = 'Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = self.testWorkflow,
                                        split_algo = "EventAwareLumiBased",
                                        type = "Processing")
        jobFactory = splitter(package = "WMCore.DataStructs",
                              subscription = testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries = False,
                               splitOnRun = False,
                               events_per_job = 850,
                               runs = ['1', '2', '4'],
                               lumis = ['10,14', '20,21', '40,41'],
                               performance = self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[10, 14]], 2: [[20, 21]], 4: [[40, 40]]})
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {4: [[41, 41]]})

Example #10

0

Show file

File: EventAwareLumiBased_t.py Project: ticoann/WMCore

    def createSubscription(self,
                           nFiles,
                           lumisPerFile,
                           twoSites=False,
                           nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i),
                                          nEventsPerFile, i, lumisPerFile,
                                          'malpaquet')
                testFileset.addFile(newFile)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        return testSubscription

Example #11

0

Show file

    def testF_HardLimitSplittingOnly(self):
        """
        _testF_HardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1,
                                    "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1,
                                    "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1,
                                    "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
        for i in range(0, 3):
            self.assertTrue(jobs[i]['failedOnCreation'],
                            "It should have been marked as failed")

            runNums = jobs[i]['mask']['runAndLumis'].keys()
            self.assertEqual(len(runNums), 1)

            lumiNums = jobs[i]['mask']['runAndLumis'].values()[0]
            self.assertEqual(len(lumiNums), 1)

            finalLumi = []
            for pair in lumiNums:
                finalLumi.extend(range(pair[0], pair[1] + 1))
            self.assertEqual(len(finalLumi), 1)

            self.assertEqual(
                jobs[i]['failedReason'],
                "File /this/is/file%d has a single lumi %s, in run %s with too many events 1000 and it woud take 12000 sec to run"
                % (i + 1, finalLumi[0], runNums[0]))

        return

Example #12

0

Show file

    def oneHundredFiles(self, splittingAlgo="EventBased", jobType="Processing"):
        """
        _oneHundredFiles_
        
        Generate a WMBS data stack representing 100 files for job splitter
        testing
        
        """
        fileset1 = Fileset(name="EventBasedFiles1")
        for i in range(0, 100):
            f = File(
                "/store/MultipleFileSplit%s.root" % i, 1000, 100, 10 + i, 12312  # lfn  # size  # events  # run  # lumi
            )
            f["locations"].add("BULLSHIT")

            fileset1.addFile(f)

        work = Workflow()
        subscription1 = Subscription(fileset=fileset1, workflow=work, split_algo=splittingAlgo, type=jobType)
        splitter = SplitterFactory()
        jobfactory = splitter(subscription1)
        jobs = jobfactory(events_per_job=100)
        # for jobGroup in jobs:
        #    yield jobGroup

        self.manager.addSeeder("RandomSeeder", **self.seedlistForRandom)
        self.manager.addSeeder("RunAndLumiSeeder")

        return jobs

Example #13

0

Show file

File: Fileset_t.py Project: stuartw/WMCore

 def testCommit(self):
     """
         Testcase for the commit method of the Fileset class
         
     """
     localTestFileSet = Fileset('LocalTestFileset', self.initialSet)
     fsSize = len(localTestFileSet.getFiles(type = "lfn"))
     #Dummy file to test
     fileTestCommit = File('/tmp/filetestcommit',0000,1,1)
     #File is added to the newfiles attribute of localTestFileSet
     localTestFileSet.addFile(fileTestCommit)
     assert fsSize == len(localTestFileSet.getFiles(type = "lfn")) - 1, 'file not added'\
             'correctly to test fileset'
     newfilestemp = localTestFileSet.newfiles
     assert fileTestCommit in newfilestemp, 'test file not in the new files'\
             'list' 
     #After commit, dummy file is supposed to move from newfiles to files
     localTestFileSet.commit()
     #First, testing if the new file is present at file set object attribute of the Fileset object
     
     assert newfilestemp.issubset(localTestFileSet.files), 'Test file not ' \
             'present at fileset.files - fileset.commit ' \
             'not working properly' 
     #Second, testing if the newfile set object attribute is empty
     assert localTestFileSet.newfiles == set(), \
             'Test file not present at fileset.newfiles ' \
             '- fileset.commit not working properly'

Example #14

0

Show file

File: MakeFakeFileSet.py Project: belforte/CRABServer

    def execute(self, *args, **kwargs): #pylint: disable=unused-argument

        # since https://github.com/dmwm/CRABServer/issues/5633 totalunits can be a float
        # but that would confuse WMCore, therefore cast to int
        totalevents = int(kwargs['task']['tm_totalunits'])
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        newFile.setLocation(self.getListOfSites())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFakeBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)

Example #15

0

Show file

File: MakeFakeFileSet.py Project: HassenRiahi/CRABServer

    def execute(self, *args, **kwargs):

        totalevents = kwargs['task']['tm_totalunits']
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        # Set a default of 100 events per lumi.  This is set as a task
        # property, as the splitting considers it independently of the file
        # information provided by the fake dataset.
        if not kwargs['task']['tm_events_per_lumi']:
            kwargs['task']['tm_events_per_lumi'] = 100

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
                          "cert":self.config.TaskWorker.cmscert})
        newFile.setLocation(sbj.getAllCMSNames())
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFakeBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)

Example #16

0

Show file

File: Fileset_t.py Project: todor-ivanov/WMCore

    def testCommit(self):
        """
            Testcase for the commit method of the Fileset class

        """
        localTestFileSet = Fileset('LocalTestFileset', self.initialSet)
        fsSize = len(localTestFileSet.getFiles(type="lfn"))
        #Dummy file to test
        fileTestCommit = File('/tmp/filetestcommit', 0000, 1, 1)
        #File is added to the newfiles attribute of localTestFileSet
        localTestFileSet.addFile(fileTestCommit)
        assert fsSize == len(localTestFileSet.getFiles(type = "lfn")) - 1, 'file not added'\
                'correctly to test fileset'
        newfilestemp = localTestFileSet.newfiles
        assert fileTestCommit in newfilestemp, 'test file not in the new files'\
                'list'
        #After commit, dummy file is supposed to move from newfiles to files
        localTestFileSet.commit()
        #First, testing if the new file is present at file set object attribute of the Fileset object

        assert newfilestemp.issubset(localTestFileSet.files), 'Test file not ' \
                'present at fileset.files - fileset.commit ' \
                'not working properly'
        #Second, testing if the newfile set object attribute is empty
        assert localTestFileSet.newfiles == set(), \
                'Test file not present at fileset.newfiles ' \
                '- fileset.commit not working properly'

Example #17

0

Show file

File: EventAwareLumiByWork_t.py Project: alexanderrichards/WMCore

    def testHardLimitSplittingOnly(self):
        """
        _testHardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)

        # Fail single lumis with more than 800 events and put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550,
                               max_events_per_lumi=800, performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        for job in jobs:
            self.assertTrue(job['failedOnCreation'])
            self.assertIn("Too many (estimated) events (1000.0) in", job['failedReason'])

        return

Example #18

0

Show file

File: EventAwareLumiBased_t.py Project: todor-ivanov/WMCore

    def testG_LumiMask(self):
        """
        _testG_LumiMask_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=850,
                               runs=['1', '2', '4'],
                               lumis=['10,14', '20,21', '40,41'],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[10, 14]], 2: [[20, 21]], 4: [[40, 40]]})
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {4: [[41, 41]]})

Example #19

0

Show file

File: ADSJobFactory.py Project: dmwm/ProdCommon

    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("SplitSize = %s" % self.splitSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        thefiles = Fileset(name='FilesToSplit')
        reader = DBSReader(self.dbsUrl)
        fileList = reader.dbs.listFiles(
            analysisDataset=self.inputDataset(),
            retriveList=['retrive_block', 'retrive_run'])

        blocks = {}

        for f in fileList:
            block = f['Block']['Name']
            if not blocks.has_key(block):
                blocks[block] = reader.listFileBlockLocation(block)
            f['Block']['StorageElementList'].extend(blocks[block])
            wmbsFile = File(f['LogicalFileName'])
            [wmbsFile['locations'].add(x) for x in blocks[block]]
            wmbsFile['block'] = block
            thefiles.addFile(wmbsFile)

        work = Workflow()
        subs = Subscription(fileset=thefiles,
                            workflow=work,
                            split_algo='FileBased',
                            type="Processing")
        splitter = SplitterFactory()
        jobfactory = splitter(subs)

        jobs = jobfactory(files_per_job=self.splitSize)

        jobDefs = []
        for job in jobs.jobs:
            #job.mask.setMaxAndSkipEvents(-1, 0)
            jobDef = JobDefinition()
            jobDef['LFNS'].extend(job.listLFNs())
            jobDef['SkipEvents'] = 0
            jobDef['MaxEvents'] = -1
            [
                jobDef['SENames'].extend(list(x['locations']))
                for x in job.listFiles()
            ]
            jobDefs.append(jobDef)

        return jobDefs

Example #20

0

Show file

    def testF_HardLimitSplittingOnly(self):
        """
        _testF_HardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
        for i in range(0, 3):
            self.assertTrue(jobs[i]['failedOnCreation'], "It should have been marked as failed")

            runNums = jobs[i]['mask']['runAndLumis'].keys()
            self.assertEqual(len(runNums), 1)

            lumiNums = jobs[i]['mask']['runAndLumis'].values()[0]
            self.assertEqual(len(lumiNums), 1)

            finalLumi = []
            for pair in lumiNums:
                finalLumi.extend(range(pair[0], pair[1] + 1))
            self.assertEqual(len(finalLumi), 1)

            self.assertEqual(jobs[i]['failedReason'],
                             "File /this/is/file%d has a single lumi %s, in run %s with too many events 1000 and it woud take 12000 sec to run" % (
                             i + 1, finalLumi[0], runNums[0]))

        return

Example #21

0

Show file

    def execute(self, *args, **kwargs):
        self.logger.info(
            "Data discovery and splitting for %s using user-provided files" %
            kwargs['task']['tm_taskname'])

        userfiles = kwargs['task']['tm_arguments'].get('userfiles')
        splitting = kwargs['task']['tm_split_algo']
        total_units = kwargs['task']['tm_totalunits']
        if not userfiles or splitting != 'FileBased':
            if not userfiles:
                msg = "No files specified to process for task %s." % kwargs[
                    'task']['tm_taskname']
            if splitting != 'FileBased':
                msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
            self.logger.error("Setting %s as failed: %s" %
                              (kwargs['task']['tm_taskname'], msg))
            configreq = {
                'workflow': kwargs['task']['tm_taskname'],
                'status': "FAILED",
                'subresource': 'failure',
                'failure': b64encode(msg)
            }
            self.server.post(self.resturi, data=urllib.urlencode(configreq))
            raise StopHandler(msg)

        if hasattr(self.config.Sites, 'available'):
            locations = self.config.Sites.available
        else:
            sbj = SiteDBJSON({
                "key": self.config.TaskWorker.cmskey,
                "cert": self.config.TaskWorker.cmscert
            })
            locations = sbj.getAllCMSNames()

        userFileset = Fileset(name=kwargs['task']['tm_taskname'])
        self.logger.info("There are %d files specified by the user." %
                         len(userfiles))
        if total_units > 0:
            self.logger.info("Will run over the first %d files." % total_units)
        file_counter = 0
        for userfile, idx in zip(userfiles, range(len(userfiles))):
            newFile = File(userfile, size=1000, events=1)
            newFile.setLocation(locations)
            newFile.addRun(Run(1, idx))
            newFile["block"] = 'UserFilesFakeBlock'
            newFile["first_event"] = 1
            newFile["last_event"] = 2
            userFileset.addFile(newFile)
            file_counter += 1
            if total_units > 0 and file_counter >= total_units:
                break

        return Result(task=kwargs['task'], result=userFileset)

Example #22

0

Show file

File: JobFactory_t.py Project: tsarangi/WMCore

    def testProductionRunNumber(self):
        """
        _testProductionRunNumber_

        Verify that jobs created by production subscritpions have the correct
        run number is their job mask.  Also verify that non-production
        subscriptions don't have modified run numbers.
        """
        testWorkflow = Workflow(spec="spec.pkl",
                                owner="Steve",
                                name="TestWorkflow",
                                task="TestTask")

        testFileset = Fileset(name="TestFileset")
        testFile = File(lfn="someLFN")
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased",
                                        type="Production")

        myJobFactory = JobFactory(subscription=testSubscription)
        testJobGroups = myJobFactory()

        self.assertTrue(len(testJobGroups) > 0)
        for testJobGroup in testJobGroups:
            self.assertTrue(len(testJobGroup.jobs) > 0)
            for job in testJobGroup.jobs:
                self.assertEqual(job["mask"]["FirstRun"], 1,
                                 "Error: First run is wrong.")
                self.assertEqual(job["mask"]["LastRun"], 1,
                                 "Error: Last run is wrong.")

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased",
                                        type="Processing")

        myJobFactory = JobFactory(subscription=testSubscription)
        testJobGroups = myJobFactory()

        for testJobGroup in testJobGroups:
            for job in testJobGroup.jobs:
                self.assertEqual(job["mask"]["FirstRun"], None,
                                 "Error: First run is wrong.")
                self.assertEqual(job["mask"]["LastRun"], None,
                                 "Error: Last run is wrong.")

        return

Example #23

0

Show file

    def testF_HardLimitSplittingOnly(self):
        """
        _testF_HardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        #Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1,
                                    "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1,
                                    "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1,
                                    "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        #Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        #and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               max_events_per_lumi=800,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
        for i in range(1, 4):
            self.assertTrue(
                jobs[i - 1]['failedOnCreation'],
                "The job processing the second file should me marked for failure"
            )
            self.assertEqual(
                jobs[i - 1]['failedReason'],
                "File /this/is/file%d has too many events (1000) in 1 lumi(s)"
                % i, "The reason for the failure is not accurate")

        return

Example #24

0

Show file

File: EventBased_t.py Project: stuartw/WMCore

 def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100, firstLumi=1, lastLumi=10):
     # MC comes with only one MCFakeFile
     singleMCFileset = Fileset(name="MCTestFileset")
     newFile = File("MCFakeFileTest", size=1000, events=numEvents)
     newFile.setLocation("se01")
     newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
     newFile["first_event"] = firstEvent
     newFile["last_event"] = lastEvent
     testWorkflow = Workflow()
     singleMCFileset.addFile(newFile)
     singleMCFileSubscription = Subscription(
         fileset=singleMCFileset, workflow=testWorkflow, split_algo="EventBased", type="Production"
     )
     return singleMCFileSubscription

Example #25

0

Show file

    def testHardLimitSplitting(self):
        """
        _testHardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Settings are to split on job boundaries, to fail single lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 5)

        # One job should be failed, the rest should be fine
        for jobNum in (0, 1, 3, 4):
            self.assertFalse(jobs[jobNum].get('failedOnCreation'))
        self.assertTrue(jobs[2]['failedOnCreation'])
        self.assertEqual(
            jobs[2]['failedReason'],
            'File /this/is/file2 has a single lumi 1, in run 1 with too many events 1000 and it woud take 12000 sec to run'
        )

        return

Example #26

0

Show file

 def generateFakeMCFile(self, numEvents = 100, firstEvent = 1,
                        lastEvent = 100, firstLumi = 1, lastLumi = 10):
     #MC comes with only one MCFakeFile
     singleMCFileset = Fileset(name = "MCTestFileset")
     newFile = File("MCFakeFileTest", size = 1000, events = numEvents)
     newFile.setLocation('se01')
     newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
     newFile["first_event"] = firstEvent
     newFile["last_event"] = lastEvent
     testWorkflow = Workflow()
     singleMCFileset.addFile(newFile)
     singleMCFileSubscription = Subscription(fileset = singleMCFileset,
                                             workflow = testWorkflow,
                                             split_algo = "EventBased",
                                             type = "Production")
     return singleMCFileSubscription

Example #27

0

Show file

File: JobFactory_t.py Project: zhiwenuil/WMCore

    def testProductionRunNumber(self):
        """
        _testProductionRunNumber_
        
        Verify that jobs created by production subscritpions have the correct
        run number is their job mask.  Also verify that non-production
        subscriptions don't have modified run numbers.
        """
        testWorkflow = Workflow(spec = "spec.pkl", owner = "Steve",
                                name = "TestWorkflow", task = "TestTask")
    
        testFileset = Fileset(name = "TestFileset")
        testFile = File(lfn = "someLFN")
        testFileset.addFile(testFile)
        testFileset.commit()
        
        testSubscription = Subscription(fileset = testFileset,
                                        workflow = testWorkflow,
                                        split_algo = "FileBased",
                                        type = "Production")
    
        myJobFactory = JobFactory(subscription = testSubscription)
        testJobGroups =  myJobFactory()
    
        for testJobGroup in testJobGroups:
            for job in testJobGroup.jobs:
                assert job["mask"]["FirstRun"] == 1, \
                       "Error: First run is wrong."
                assert job["mask"]["LastRun"] == 1, \
                       "Error: Last run is wrong."

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = testWorkflow,
                                        split_algo = "FileBased",
                                        type = "Processing")
    
        myJobFactory = JobFactory(subscription = testSubscription)
        testJobGroups =  myJobFactory()
        
        for testJobGroup in testJobGroups:
            for job in testJobGroup.jobs:
                assert job["mask"]["FirstRun"] == None, \
                       "Error: First run is wrong."
                assert job["mask"]["LastRun"] == None, \
                       "Error: Last run is wrong."

        return

Example #28

0

Show file

File: UserDataDiscovery.py Project: mialiu149/CRABServer

    def execute(self, *args, **kwargs):
        self.logger.info("Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname'])

        if 'tm_user_files' in kwargs['task'] and kwargs['task']['tm_user_files']:
            userfiles = kwargs['task']['tm_user_files']
        else: ## For backward compatibility only.
            userfiles = kwargs['task']['tm_arguments'].get('userfiles')
        splitting = kwargs['task']['tm_split_algo']
        total_units = kwargs['task']['tm_totalunits']
        if not userfiles or splitting != 'FileBased':
            if not userfiles:
                msg = "No files specified to process for task %s." % kwargs['task']['tm_taskname']
            if splitting != 'FileBased':
                msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
            self.logger.error("Setting %s as failed: %s" % (kwargs['task']['tm_taskname'], msg))
            configreq = {'workflow': kwargs['task']['tm_taskname'],
                         'status': "FAILED",
                         'subresource': 'failure',
                         'failure': b64encode(msg)}
            self.server.post(self.resturi, data = urllib.urlencode(configreq))
            raise StopHandler(msg)

        if hasattr(self.config.Sites, 'available'):
            locations = self.config.Sites.available
        else:
            sbj = SiteDBJSON({"key":self.config.TaskWorker.cmskey,
                              "cert":self.config.TaskWorker.cmscert})
            locations = sbj.getAllCMSNames()

        userFileset = Fileset(name = kwargs['task']['tm_taskname'])
        self.logger.info("There are %d files specified by the user." % len(userfiles))
        if total_units > 0:
            self.logger.info("Will run over the first %d files." % total_units)
        file_counter = 0
        for userfile, idx in zip(userfiles, range(len(userfiles))):
            newFile = File(userfile, size = 1000, events = 1)
            newFile.setLocation(locations)
            newFile.addRun(Run(1, idx))
            newFile["block"] = 'UserFilesFakeBlock'
            newFile["first_event"] = 1
            newFile["last_event"] = 2
            userFileset.addFile(newFile)
            file_counter += 1
            if total_units > 0 and file_counter >= total_units:
                break

        return Result(task = kwargs['task'], result = userFileset)

Example #29

0

Show file

File: UserDataDiscovery.py Project: sharmaprajesh/CRABServer

    def execute(self, *args, **kwargs):
        self.logger.info(
            "Data discovery and splitting for %s using user-provided files" %
            kwargs['task']['tm_taskname'])

        userfiles = kwargs['task']['tm_user_files']
        splitting = kwargs['task']['tm_split_algo']
        total_units = kwargs['task']['tm_totalunits']
        if not userfiles or splitting != 'FileBased':
            if not userfiles:
                msg = "No files specified to process for task %s." % kwargs[
                    'task']['tm_taskname']
            if splitting != 'FileBased':
                msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
            raise TaskWorkerException(msg)

        if hasattr(self.config.Sites, 'available'):
            locations = self.config.Sites.available
        else:
            with self.config.TaskWorker.envForCMSWEB:
                configDict = {
                    "cacheduration": 1,
                    "pycurl": True
                }  # cache duration is in hours
                resourceCatalog = CRIC(logger=self.logger,
                                       configDict=configDict)
                locations = resourceCatalog.getAllPSNs()

        userFileset = Fileset(name=kwargs['task']['tm_taskname'])
        self.logger.info("There are %d files specified by the user." %
                         len(userfiles))
        if total_units > 0:
            self.logger.info("Will run over the first %d files." % total_units)
        file_counter = 0
        for userfile, idx in zip(userfiles, range(len(userfiles))):
            newFile = File(userfile, size=1000, events=1)
            newFile.setLocation(locations)
            newFile.addRun(Run(1, idx))
            newFile["block"] = 'UserFilesFakeBlock'
            newFile["first_event"] = 1
            newFile["last_event"] = 2
            userFileset.addFile(newFile)
            file_counter += 1
            if total_units > 0 and file_counter >= total_units:
                break

        return Result(task=kwargs['task'], result=userFileset)

Example #30

0

Show file

    def getFileset(self):
        """
        Get a fileset based on the task

        """

        fileset = Fileset(name='Merge%s' % (type))

        for i in range(0, random.randint(15, 25)):
            # Use the testDir to generate a random lfn
            inpFile = File(lfn="%s/%s.root" % (self.testDir, makeUUID()),
                           size=random.randint(200000, 1000000),
                           events=random.randint(1000, 2000))
            inpFile.setLocation('Megiddo')
            fileset.addFile(inpFile)

        return fileset

Example #31

0

Show file

File: Runtime_t.py Project: vkuznet/WMCore

    def getFileset(self):
        """
        Get a fileset based on the task

        """

        fileset = Fileset(name='Merge%s' % (type))

        for i in range(0, random.randint(15, 25)):
            # Use the testDir to generate a random lfn
            inpFile = File(lfn="%s/%s.root" % (self.testDir, makeUUID()),
                           size=random.randint(200000, 1000000),
                           events=random.randint(1000, 2000))
            inpFile.setLocation('Megiddo')
            fileset.addFile(inpFile)

        return fileset

Example #32

0

Show file

    def testE_HardLimitSpltting(self):
        """
        _testE_HardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        #Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        #Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        #and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               max_events_per_lumi=800,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        self.assertTrue(
            jobs[3]['failedOnCreation'],
            "The job processing the second file should me marked for failure")
        self.assertEqual(
            jobs[3]['failedReason'],
            "File /this/is/file2 has too many events (1000) in 1 lumi(s)",
            "The reason for the failure is not accurate")

        return

Example #33

0

Show file

    def testI_DisableHardLimitSplitting(self):
        """
        _testI_DisableHardLimitSplitting_

        Test that we can bypass the job time limit when allowCreationFailure is
        set to False. The algorithm shall take single lumi files with time per
        lumi greater than the job time limit but not mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               allowCreationFailure=False,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        failedJobs = [
            job for job in jobs if job.get('failedOnCreation', False)
        ]
        self.assertEqual(len(failedJobs), 0, "There should be no failed jobs")

        return

Example #34

0

Show file

File: EventAwareLumiBased_t.py Project: alexanderrichards/WMCore

    def testF_HardLimitSplittingOnly(self):
        """
        _testF_HardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               max_events_per_lumi=800,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
        for i in range(1, 4):
            self.assertTrue(jobs[i - 1]['failedOnCreation'],
                            "The job processing the second file should me marked for failure")
            self.assertEqual(jobs[i - 1]['failedReason'],
                             "File /this/is/file%d has too many events (1000) in 1 lumi(s)" % i,
                             "The reason for the failure is not accurate")

        return

Example #35

0

Show file

File: MakeFakeFileSet.py Project: HassenRiahi/CAFTaskWorker

    def execute(self, *args, **kwargs):

        totalevents = kwargs['task']['tm_totalunits']
        firstEvent = 1
        lastEvent = totalevents
        firstLumi = 1
        lastLumi = 10

        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCFakeFileSet")
        newFile = File("MCFakeFile", size = 1000, events = totalevents)
        newFile.setLocation(self.config.Sites.available)
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["block"] = 'MCFackBlock'
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        singleMCFileset.addFile(newFile)

        return Result(task=kwargs['task'], result=singleMCFileset)

Example #36

0

Show file

    def testE_HardLimitSpltting(self):
        """
        _testE_HardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        failedJobs = [job for job in jobs if job.get('failedOnCreation', False)]
        self.assertEqual(len(failedJobs), 1)
        self.assertEqual(failedJobs[0]['failedReason'],
                         'File /this/is/file2 has a single lumi 1, in run 1 with too many events 1000 and it woud take 12000 sec to run')

        return

Example #37

0

Show file

File: EventAwareLumiBased_t.py Project: lucacopa/WMCore

    def testE_HardLimitSpltting(self):
        """
        _testE_HardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        #Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name = "FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = self.testWorkflow,
                                        split_algo = "EventAwareLumiBased",
                                        type = "Processing")
        jobFactory = splitter(package = "WMCore.DataStructs",
                              subscription = testSubscription)
        #Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        #and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries = True,
                               splitOnRun = True,
                               events_per_job = 550,
                               max_events_per_lumi = 800,
                               performance = self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        self.assertTrue(jobs[3]['failedOnCreation'], "The job processing the second file should me marked for failure")
        self.assertEqual(jobs[3]['failedReason'], "File /this/is/file2 has too many events (1000) in 1 lumi(s)",
                          "The reason for the failure is not accurate")

        return

Example #38

0

Show file

File: EventAwareLumiBased_t.py Project: vkuznet/WMCore

    def testI_DisableHardLimitSplitting(self):
        """
        _testI_DisableHardLimitSplitting_

        Test that we can bypass the job time limit when allowCreationFailure is
        set to False. The algorithm shall take single lumi files with time per
        lumi greater than the job time limit but not mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               allowCreationFailure=False,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        failedJobs = [job for job in jobs if job.get('failedOnCreation', False)]
        self.assertEqual(len(failedJobs), 0, "There should be no failed jobs")

        return

Example #39

0

Show file

File: JobFactory_t.py Project: tsarangi/WMCore

    def testMetaData(self):
        """
        _testMetaData_

        Make sure that the workflow name, task, owner and white and black lists
        make it into each job object.
        """
        testWorkflow = Workflow(spec="spec.pkl",
                                owner="Steve",
                                name="TestWorkflow",
                                task="TestTask")

        testFileset = Fileset(name="TestFileset")
        testFile = File(lfn="someLFN")
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")

        myJobFactory = JobFactory(subscription=testSubscription)
        testJobGroups = myJobFactory(siteWhitelist=["site1"],
                                     siteBlacklist=["site2"])
        self.assertTrue(len(testJobGroups) > 0)

        for testJobGroup in testJobGroups:
            self.assertTrue(len(testJobGroup.jobs) > 0)
            for job in testJobGroup.jobs:
                self.assertEqual(job["task"], "TestTask",
                                 "Error: Task is wrong.")
                self.assertEqual(job["workflow"], "TestWorkflow",
                                 "Error: Workflow is wrong.")
                self.assertEqual(job["owner"], "Steve",
                                 "Error: Owner is wrong.")
                self.assertEqual(job["siteWhitelist"], ["site1"],
                                 "Error: Site white list is wrong.")
                self.assertEqual(job["siteBlacklist"], ["site2"],
                                 "Error: Site black list is wrong.")
        return

Example #40

0

Show file

File: UserDataDiscovery.py Project: belforte/CRABServer

    def execute(self, *args, **kwargs):
        self.logger.info("Data discovery and splitting for %s using user-provided files" % kwargs['task']['tm_taskname'])

        userfiles = kwargs['task']['tm_user_files']
        splitting = kwargs['task']['tm_split_algo']
        total_units = kwargs['task']['tm_totalunits']
        if not userfiles or splitting != 'FileBased':
            if not userfiles:
                msg = "No files specified to process for task %s." % kwargs['task']['tm_taskname']
            if splitting != 'FileBased':
                msg = "Data.splitting must be set to 'FileBased' when using a custom set of files."
            raise TaskWorkerException(msg)

        if hasattr(self.config.Sites, 'available'):
            locations = self.config.Sites.available
        else:
            with self.config.TaskWorker.envForCMSWEB :
                configDict = {"cacheduration": 1, "pycurl": True} # cache duration is in hours
                resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
                locations = resourceCatalog.getAllPSNs()

        userFileset = Fileset(name = kwargs['task']['tm_taskname'])
        self.logger.info("There are %d files specified by the user." % len(userfiles))
        if total_units > 0:
            self.logger.info("Will run over the first %d files." % total_units)
        file_counter = 0
        for userfile, idx in zip(userfiles, range(len(userfiles))):
            newFile = File(userfile, size = 1000, events = 1)
            newFile.setLocation(locations)
            newFile.addRun(Run(1, idx))
            newFile["block"] = 'UserFilesFakeBlock'
            newFile["first_event"] = 1
            newFile["last_event"] = 2
            userFileset.addFile(newFile)
            file_counter += 1
            if total_units > 0 and file_counter >= total_units:
                break

        return Result(task = kwargs['task'], result = userFileset)

Example #41

0

Show file

File: EventAwareLumiByWork_t.py Project: alexanderrichards/WMCore

    def testHardLimitSplitting(self):
        """
        _testHardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)

        # Settings are to split on job boundaries, to fail single lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550,
                               max_events_per_lumi=800, performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 5)

        # One job should be failed, the rest should be fine
        for jobNum in (0, 1, 3, 4):
            self.assertFalse(jobs[jobNum].get('failedOnCreation'))
        self.assertTrue(jobs[2]['failedOnCreation'])
        self.assertEqual(jobs[2]['failedReason'], 'Too many (estimated) events (1000.0) in run 1, lumi 1')

        return

Example #42

0

Show file

    def oneHundredFiles(self,
                        splittingAlgo="EventBased",
                        jobType="Processing"):
        """
        _oneHundredFiles_

        Generate a WMBS data stack representing 100 files for job splitter
        testing

        """
        fileset1 = Fileset(name='EventBasedFiles1')
        for i in range(0, 100):
            f = File(
                "/store/MultipleFileSplit%s.root" % i,  # lfn
                1000,  # size
                100,  # events
                10 + i,  # run
                12312  # lumi
            )
            f['locations'].add("BULLSHIT")

            fileset1.addFile(f)

        work = Workflow()
        subscription1 = Subscription(fileset=fileset1,
                                     workflow=work,
                                     split_algo=splittingAlgo,
                                     type=jobType)
        splitter = SplitterFactory()
        jobfactory = splitter(subscription1)
        jobs = jobfactory(events_per_job=100)
        #for jobGroup in jobs:
        #    yield jobGroup

        self.manager.addGenerator("RandomSeeder", **self.seedlistForRandom)
        self.manager.addGenerator("RunAndLumiSeeder")

        return jobs

Example #43

0

Show file

File: LumiBased_t.py Project: vytjan/WMCore

    def createSubscription(self, nFiles, lumisPerFile, twoSites=False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = File(lfn='%s_%i' % (baseName, i), size=1000, events=100)
            lumis = []
            for lumi in range(lumisPerFile):
                lumis.append((i * 100) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.setLocation('blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn='%s_%i_2' % (baseName, i),
                               size=1000,
                               events=100)
                lumis = []
                for lumi in range(lumisPerFile):
                    lumis.append(5 + 10 * (i * 100) +
                                 lumi)  #lumis should be different
                newFile.addRun(Run(i, *lumis))
                newFile.setLocation('malpaquet')
                testFileset.addFile(newFile)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")

        return testSubscription

Example #44

0

Show file

    def createSubscription(self, nFiles, lumisPerFile, twoSites = False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name = baseName)
        for i in range(nFiles):
            newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
                           events = 100)
            lumis = []
            for lumi in range(lumisPerFile):
                lumis.append((i * 100) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.setLocation('blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000,
                               events = 100)
                lumis = []
                for lumi in range(lumisPerFile):
                    lumis.append(5 + 10 * (i * 100) + lumi) #lumis should be different
                newFile.addRun(Run(i, *lumis))
                newFile.setLocation('malpaquet')
                testFileset.addFile(newFile)


        testSubscription  = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "LumiBased",
                                         type = "Processing")

        return testSubscription

Example #45

0

Show file

    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Implement merge algorithm for the subscription provided

        """
        fileset = list(self.subscription.availableFiles())

        mergeSize = int(kwargs['merge_size'])
        overflow = bool(kwargs.get('all_files', False))
        fileset.sort()

        accumSize = 0
        jobFiles = Fileset()
        locationDict = self.sortByLocation()
        for location in locationDict:
            baseName = makeUUID()
            self.newGroup()
            for f in locationDict[location]:
                accumSize += f['size']
                jobFiles.addFile(f)
                if accumSize >= mergeSize:
                    self.newJob(name='%s-%s' %
                                (baseName, len(self.currentGroup.jobs) + 1),
                                files=jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
                    accumSize = 0
                    jobFiles = Fileset()

            if len(jobFiles) > 0:
                if overflow:
                    self.newJob(name='%s-%s' %
                                (baseName, len(self.currentGroup.jobs) + 1),
                                files=jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)

Example #46

0

Show file

File: EventBased_t.py Project: vkuznet/WMCore

    def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100,
                           firstLumi=1, lastLumi=10, existingSub=None):
        # MC comes with only one MCFakeFile
        newFile = File("MCFakeFileTest", size=1000, events=numEvents)
        newFile.setLocation('se01')
        if firstLumi == lastLumi:
            newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        else:
            newFile.addRun(Run(1, *range(firstLumi, lastLumi)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent

        if existingSub is None:
            singleMCFileset = Fileset(name="MCTestFileset")
            singleMCFileset.addFile(newFile)
            testWorkflow = Workflow()
            existingSub = Subscription(fileset=singleMCFileset,
                                       workflow=testWorkflow,
                                       split_algo="EventBased",
                                       type="Production")
        else:
            existingSub['fileset'].addFile(newFile)

        return existingSub

Example #47

0

Show file

class FixedDelayTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(i, *[45 + i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name="TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45 + i / 3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name="TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.multipleLumiSubscription = Subscription(
            fileset=self.multipleFileLumiset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleLumiSubscription = Subscription(
            fileset=self.singleLumiFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")

        return

    def tearDown(self):
        """
        _tearDown_

        Nothing to do...
        """
        pass

    def testNone(self):
        """
        _testNone_

        Since the time hasn'tpassed, we shouldn't get any jobs back.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time=int(time.time()) * 2)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time=int(time.time()) * 2)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time=int(time.time()) * 2)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time=int(time.time()) * 2)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        return

    def testClosed(self):
        """
        _testClosed_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        self.multipleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs), 1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)

        self.multipleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs), 1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
        #self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)

    def testAllAcquired(self):
        """
        _testAllAcquired_
        should all return no job groups
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.acquireFiles(
            self.singleFileSubscription.availableFiles())
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [],
                          "Should have returned a null set: %s" % jobGroups)

        self.multipleFileSubscription.acquireFiles(
            self.multipleFileSubscription.availableFiles())
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.multipleLumiSubscription.acquireFiles(
            self.multipleLumiSubscription.availableFiles())
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.acquireFiles(
            self.singleLumiSubscription.availableFiles())
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

    def testClosedSomeAcquired(self):
        """
        _testClosedSomeAcquired_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.multipleFileSubscription.getFileset().markOpen(False)
        self.singleFileSubscription.acquireFiles(
            [self.singleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.multipleFileSubscription.getFileset().markOpen(False)
        self.multipleFileSubscription.acquireFiles(
            [self.multipleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.multipleLumiSubscription.getFileset().markOpen(False)
        self.multipleLumiSubscription.acquireFiles(
            [self.multipleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.singleLumiSubscription.getFileset().markOpen(False)
        self.singleLumiSubscription.acquireFiles(
            [self.singleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.assertEquals(len(myfiles), 9)

Example #48

0

Show file

File: FileBased_t.py Project: ticoann/WMCore

class FileBasedTest(unittest.TestCase):
    """
    _FileBasedTest_

    Test file based job splitting.
    """


    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100)
        newFile.setLocation('blenheim')
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "FileBased",
                                                     type = "Processing")
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "FileBased",
                                                   type = "Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        return

    def tearDown(self):
        """
        _tearDown_

        Nothing to do...
        """
        pass

    def testExactFiles(self):
        """
        _testExactFiles_

        Test file based job splitting when the number of files per job is
        exactly the same as the number of files in the input fileset.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)

        jobGroups = jobFactory(files_per_job = 1)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        return

    def testMoreFiles(self):
        """
        _testMoreFiles_

        Test file based job splitting when the number of files per job is
        greater than the number of files in the input fileset.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)

        jobGroups = jobFactory(files_per_job = 10)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        return

    def test2FileSplit(self):
        """
        _test2FileSplit_

        Test file based job splitting when the number of files per job is
        2, this should result in five jobs.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job = 2)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 5, \
               "ERROR: JobFactory didn't create two jobs."

        fileSet = set()
        for job in jobGroups[0].jobs:
            assert len(job.getFiles(type = "set")) == 2, \
                   "ERROR: Job contains incorrect number of files."

            for file in job.getFiles(type = "lfn"):
                fileSet.add(file)

        assert len(fileSet) == 10, \
               "ERROR: Not all files assinged to job."

        return

    def test3FileSplit(self):
        """
        _test3FileSplit_

        Test file based job splitting when the number of files per job is
        3, this should result in four jobs.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job = 3)

        self.assertEqual(len(jobGroups), 1)

        self.assertEqual(len(jobGroups[0].jobs), 4)

        fileList = []
        for job in jobGroups[0].jobs:
            assert len(job.getFiles(type = "list")) in [3, 1], \
                   "ERROR: Job contains incorrect number of files."

            for file in job.getFiles(type = "lfn"):
                assert file not in fileList, \
                       "ERROR: File duplicated!"
                fileList.append(file)

        self.assertEqual(len(fileList), 10)

        return

Example #49

0

Show file

    def testNoFileSplitNoHardLimit(self):
        """
        _testNoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        # Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100,
                                                   lumisPerFile=7,
                                                   twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # First test, the optimal settings are 360 events per job. As we have files with 0 events per lumi, this will
        # configure the splitting to a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)

        # One job in one job group with 100 files
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1)
        self.assertEqual(len(jobs[0]['input_files']), 100)

        # Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "blenheim")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1, "blenheim")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1, "blenheim")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1, "blenheim")
        testFileG = self.createFile("/this/is/file7", 153, 6, 3, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Split the work targeting 150 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=150,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7)

        # Test interactions of this algorithm with splitOnRun = True
        # Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("malpaquet")

        fileB = self.createFile('/this/is/file2', 200, 3, 5, "malpaquet")

        testFileset = Fileset(name='FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True,
                               halt_job_on_file_boundaries=False,
                               events_per_job=700,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7)
        # Make sure each job has one run
        for job in jobs:
            self.assertEqual(len(job['mask'].getRunAndLumis()), 1)

Example #50

0

Show file

File: EventAwareLumiBased_t.py Project: ticoann/WMCore

    def testD_NoFileSplitNoHardLimit(self):
        """
        _testD_NoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        #Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100,
                                                   lumisPerFile=7,
                                                   twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        #First test, the optimal settings are 360 events per job
        #As we have files with 0 events per lumi, this will configure the splitting to
        #a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360)
        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1, "There should be 1 job")
        self.assertEqual(len(jobs[0]['input_files']), 100,
                         "All 100 files must be in the job")

        #Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "blenheim")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1, "blenheim")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1, "blenheim")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1, "blenheim")
        testFileG = self.createFile("/this/is/file7", 151, 6, 3, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        #Optimal settings are: jobs with 150 events per job
        #This means, the first file must be splitted in 3 lumis per job which would leave room
        #for another lumi in the second job, but the second file has a lumi too big for that
        #The 3rd job only contains the second file, the fourth and fifth job split the third file
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=150)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 8, "Eight jobs must be in the jobgroup")
        self.assertEqual(jobs[0]["mask"].getRunAndLumis(), {0L: [[0L, 2L]]},
                         "Wrong mask for the first job")
        self.assertEqual(jobs[1]["mask"].getRunAndLumis(), {0L: [[3L, 4L]]},
                         "Wrong mask for the second job")
        self.assertEqual(jobs[2]["mask"].getRunAndLumis(), {1L: [[1L, 1L]]},
                         "Wrong mask for the third job")
        self.assertEqual(jobs[3]["mask"].getRunAndLumis(), {2L: [[4L, 4L]]},
                         "Wrong mask for the fourth job")
        self.assertEqual(jobs[4]["mask"].getRunAndLumis(), {2L: [[5L, 5L]]},
                         "Wrong mask for the fifth job")
        self.assertEqual(jobs[5]["mask"].getRunAndLumis(), {
            3L: [[3L, 3L]],
            4L: [[4L, 4L]],
            5L: [[5L, 5L]]
        }, "Wrong mask for the sixth job")
        self.assertEqual(jobs[6]["mask"].getRunAndLumis(), {6L: [[18L, 19L]]},
                         "Wrong mask for the seventh job")
        self.assertEqual(jobs[7]["mask"].getRunAndLumis(), {6L: [[20L, 20L]]},
                         "Wrong mask for the seventh job")
        #Test interactions of this algorithm with splitOnRun = True
        #Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("malpaquet")

        fileB = self.createFile('/this/is/file2', 200, 3, 5, "malpaquet")

        testFileset = Fileset(name='FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        #The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True,
                               halt_job_on_file_boundaries=False,
                               events_per_job=700)
        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")

Example #51

0

Show file

File: Subscription.py Project: dmwm/WMCore-legacy

class Subscription(Pickleable, dict):
    def __init__(self,
                 fileset=None,
                 workflow=None,
                 whitelist=None,
                 blacklist=None,
                 split_algo="FileBased",
                 type="Processing"):
        if fileset == None:
            fileset = Fileset()
        if whitelist == None:
            whitelist = set()
        if blacklist == None:
            blacklist = set()

        self.setdefault('fileset', fileset)
        self.setdefault('workflow', workflow)
        self.setdefault('type', type)

        self.setdefault('split_algo', split_algo)
        self.setdefault('whitelist', whitelist)
        self.setdefault('blacklist', blacklist)

        self.available = Fileset(name=fileset.name, files=fileset.getFiles())

        self.acquired = Fileset(name='acquired')
        self.completed = Fileset(name='completed')
        self.failed = Fileset(name='failed')

    def name(self):
        return self.getWorkflow().name.replace(' ', '') + '_' + \
                    self.getFileset().name.replace(' ', '')

    def getWorkflow(self):
        return self["workflow"]

    def workflowName(self):
        if self["workflow"] == None:
            return "Unknown"
        return self["workflow"].name

    def taskName(self):
        if self['workflow'] == None:
            return "Unknown"
        return self['workflow'].task

    def getFileset(self):
        return self['fileset']

    def acquireFiles(self, files=[], size=1):
        """
        Return the files acquired
        """
        self.acquired.commit()
        self.available.commit()
        self.failed.commit()
        self.completed.commit()
        retval = []
        if len(files):
            for i in files:
                # Check each set, instead of elif, just in case something has
                # got out of synch
                if i in self.available.files:
                    self.available.files.remove(i)
                if i in self.failed.files:
                    self.failed.files.remove(i)
                if i in self.completed.files:
                    self.completed.files.remove(i)
                self.acquired.addFile(i)
        else:
            if len(self.available.files) < size or size == 0:
                size = len(self.available.files)
            for i in range(size):
                self.acquired.addFile(self.available.files.pop())

        return self.acquired.listNewFiles()

    def completeFiles(self, files):
        """
        Return the number of files complete
        """
        self.acquired.commit()
        self.available.commit()
        self.failed.commit()
        self.completed.commit()
        for i in files:
            # Check each set, instead of elif, just in case something has
            # got out of synch
            if i in self.available.files:
                self.available.files.remove(i)
            if i in self.failed.files:
                self.failed.files.remove(i)
            if i in self.acquired.files:
                self.acquired.files.remove(i)
            self.completed.addFile(i)

    def failFiles(self, files):
        """
        Return the number of files failed
        """
        self.acquired.commit()
        self.available.commit()
        self.failed.commit()
        self.completed.commit()
        for i in files:
            # Check each set, instead of elif, just in case something has
            # got out of synch
            if i in self.available.files:
                self.available.files.remove(i)
            if i in self.completed.files:
                self.completed.files.remove(i)
            if i in self.acquired.files:
                self.acquired.files.remove(i)
            self.failed.addFile(i)

    def filesOfStatus(self, status=None):
        """
        _filesOfStatus_

        Return a Set of File objects that are associated with the subscription
        and have a particular status.
        """
        status = status.title()
        if status == 'Available':
            return self.available.getFiles(type='set') - \
            (self.acquiredFiles() | self.completedFiles() | self.failedFiles())
        elif status == 'Acquired':
            return self.acquired.getFiles(type='set')
        elif status == 'Completed':
            return self.completed.getFiles(type='set')
        elif status == 'Failed':
            return self.failed.getFiles(type='set')

    def markLocation(self, location, whitelist=True):
        """
        Add a location to the subscriptions white or black list
        """
        if whitelist:
            self['whitelist'].add(location)
        else:
            self['blacklist'].add(location)

    def availableFiles(self):
        """
        Return a Set of files that are available for processing
        (e.g. not already in use) and at sites that are white listed
        or not black listed
        """
        def locationMagic(files, locations):
            """
            files and locations are sets. method returns the subset of files
            that are at the locations - this is a lot simpler with the database
            """
            magicfiles = set()
            for f in files:
                if len(f['locations'] & locations) > 0:
                    magicfiles.add(f)
            return magicfiles

        files = self.filesOfStatus(status="Available")

        if len(self['whitelist']) > 0:
            # Return files at white listed sites
            return locationMagic(files, self['whitelist'])
        elif len(self['blacklist']) > 0:
            # Return files not at black listed sites
            return files - locationMagic(files, self['blacklist'])
        #Return all files, because you're crazy and just don't care
        return files

    def acquiredFiles(self):
        """
        Set of files marked as acquired.
        """
        return self.filesOfStatus(status="Acquired")

    def completedFiles(self):
        """
        Set of files marked as completed.
        """
        return self.filesOfStatus(status="Completed")

    def failedFiles(self):
        """
        Set of files marked as failed.
        """
        return self.filesOfStatus(status="Failed")

Example #52

0

Show file

File: EventAwareLumiByWork_t.py Project: alexanderrichards/WMCore

    def testRunWhiteList(self):
        """
        _testRunWhiteList_

        Test that we can use a run white list to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Split with no breaks
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=725,
                               runWhitelist=[1, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 4])

        # Re-split with a break on runs
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=True,
                               events_per_job=595,
                               runWhitelist=[1, 3, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 4)
        self.enforceLimits(jobs=jobs, runsPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 3, 4])

        # Re-split with a break on files
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=False,
                               events_per_job=595,
                               runWhitelist=[1, 2, 3],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        self.enforceLimits(jobs=jobs, filesPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 2, 3])

Example #53

0

Show file

File: EndOfRun_t.py Project: zhiwenuil/WMCore

class EndOfRunBasedTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45+i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name = "TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45+i/3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name = "TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)
            

        testWorkflow = Workflow()
        self.multipleFileSubscription  = Subscription(fileset = self.multipleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleFileSubscription    = Subscription(fileset = self.singleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.multipleLumiSubscription  = Subscription(fileset = self.multipleFileLumiset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleLumiSubscription    = Subscription(fileset = self.singleLumiFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")


        return

    def tearDown(self):
        pass

    def testNone(self):
        """
        _testNone_
        since the subscriptions are open, we shouldn't get any jobs back
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        return
    
    def testClosed(self):
        """
        _testClosed_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory()
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."
        
        self.multipleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs),1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
        
        self.multipleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs),1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
        #self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory()
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
        
        
    def testAllAcquired(self):
        """
        _testAllAcquired_
        should all return no job groups
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.acquireFiles(
                           self.singleFileSubscription.availableFiles())
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        self.multipleFileSubscription.acquireFiles(
                           self.multipleFileSubscription.availableFiles())
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        self.multipleLumiSubscription.acquireFiles(
                           self.multipleLumiSubscription.availableFiles())
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.acquireFiles(
                           self.singleLumiSubscription.availableFiles())
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
    def testClosedSomeAcquired(self):
        """
        _testClosedSomeAcquired_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.multipleFileSubscription.getFileset().markOpen(False)
        self.singleFileSubscription.acquireFiles(
                           [self.singleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        self.multipleFileSubscription.getFileset().markOpen(False)
        self.multipleFileSubscription.acquireFiles(
                           [self.multipleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")
        
        self.multipleLumiSubscription.getFileset().markOpen(False)
        self.multipleLumiSubscription.acquireFiles(
                           [self.multipleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")
        
        self.singleLumiSubscription.getFileset().markOpen(False)
        self.singleLumiSubscription.acquireFiles(
                           [self.singleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")
        
        self.assertEquals(len(myfiles), 9)

Example #54

0

Show file

    def testRunWhiteList(self):
        """
        _testRunWhiteList_

        Test that we can use a run white list to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Split with no breaks
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=725,
                               runWhitelist=[1, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 4])

        # Re-split with a break on runs
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=True,
                               events_per_job=595,
                               runWhitelist=[1, 3, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 4)
        self.enforceLimits(jobs=jobs, runsPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 3, 4])

        # Re-split with a break on files
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=False,
                               events_per_job=595,
                               runWhitelist=[1, 2, 3],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        self.enforceLimits(jobs=jobs, filesPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 2, 3])

Example #55

0

Show file

 def addFile(self, file):
     """
     Add the file object to the set, but don't commit to the database
     Call commit() to do that - enables bulk operations
     """
     WMFileset.addFile(self, file)

Example #56

0

Show file

File: EventAwareLumiByWork_t.py Project: alexanderrichards/WMCore

    def testNoFileSplitNoHardLimit(self):
        """
        _testNoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        # Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100, lumisPerFile=7, twoSites=False, nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)

        # First test, the optimal settings are 360 events per job. As we have files with 0 events per lumi, this will
        # configure the splitting to a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False, splitOnRun=False, events_per_job=360,
                               performance=self.performanceParams)

        # One job in one job group with 100 files
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1)
        self.assertEqual(len(jobs[0]['input_files']), 100)

        # Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "blenheim")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1, "blenheim")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1, "blenheim")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1, "blenheim")
        testFileG = self.createFile("/this/is/file7", 153, 6, 3, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)

        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)
        # Split the work targeting 150 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=False, splitOnRun=False, events_per_job=150,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7)

        # Test interactions of this algorithm with splitOnRun = True
        # Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("malpaquet")

        fileB = self.createFile('/this/is/file2', 200, 3, 5, "malpaquet")

        testFileset = Fileset(name='FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)
        # The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True, halt_job_on_file_boundaries=False, events_per_job=700,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7)
        # Make sure each job has one run
        for job in jobs:
            self.assertEqual(len(job['mask'].getRunAndLumis()), 1)

Example #57

0

Show file

File: EventAwareLumiBased_t.py Project: ticoann/WMCore

    def testD_NoFileSplitNoHardLimit(self):
        """
        _testD_NoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        #Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles = 100, lumisPerFile = 7, twoSites = False,
                                                   nEventsPerFile = 0)
        jobFactory = splitter(package = "WMCore.DataStructs",
                              subscription = testSubscription)

        #First test, the optimal settings are 360 events per job
        #As we have files with 0 events per lumi, this will configure the splitting to
        #a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries = False,
                               splitOnRun = False,
                               events_per_job = 360)
        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1, "There should be 1 job")
        self.assertEqual(len(jobs[0]['input_files']), 100, "All 100 files must be in the job")

        #Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name = "FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "blenheim")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1, "blenheim")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1, "blenheim")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1, "blenheim")
        testFileG = self.createFile("/this/is/file7", 151, 6, 3, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = self.testWorkflow,
                                        split_algo = "EventAwareLumiBased",
                                        type = "Processing")

        jobFactory = splitter(package = "WMCore.DataStructs",
                              subscription = testSubscription)
        #Optimal settings are: jobs with 150 events per job
        #This means, the first file must be splitted in 3 lumis per job which would leave room
        #for another lumi in the second job, but the second file has a lumi too big for that
        #The 3rd job only contains the second file, the fourth and fifth job split the third file
        jobGroups = jobFactory(halt_job_on_file_boundaries = False,
                               splitOnRun = False,
                               events_per_job = 150)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 8, "Eight jobs must be in the jobgroup")
        self.assertEqual(jobs[0]["mask"].getRunAndLumis(), {0L : [[0L, 2L]]}, "Wrong mask for the first job")
        self.assertEqual(jobs[1]["mask"].getRunAndLumis(), {0L : [[3L, 4L]]}, "Wrong mask for the second job")
        self.assertEqual(jobs[2]["mask"].getRunAndLumis(), {1L : [[1L, 1L]]}, "Wrong mask for the third job")
        self.assertEqual(jobs[3]["mask"].getRunAndLumis(), {2L : [[4L, 4L]]}, "Wrong mask for the fourth job")
        self.assertEqual(jobs[4]["mask"].getRunAndLumis(), {2L : [[5L, 5L]]}, "Wrong mask for the fifth job")
        self.assertEqual(jobs[5]["mask"].getRunAndLumis(),
                         {3L : [[3L, 3L]], 4L : [[4L, 4L]], 5L : [[5L, 5L]]}, "Wrong mask for the sixth job")
        self.assertEqual(jobs[6]["mask"].getRunAndLumis(), {6L : [[18L, 19L]]}, "Wrong mask for the seventh job")
        self.assertEqual(jobs[7]["mask"].getRunAndLumis(), {6L : [[20L, 20L]]}, "Wrong mask for the seventh job")
        #Test interactions of this algorithm with splitOnRun = True
        #Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn = "/this/is/file1", size = 1000,
                       events = 2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("malpaquet")

        fileB = self.createFile('/this/is/file2', 200, 3, 5, "malpaquet")

        testFileset = Fileset(name = 'FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testSubscription = Subscription(fileset = testFileset,
                                        workflow = self.testWorkflow,
                                        split_algo = "EventAwareLumiBased",
                                        type = "Processing")

        jobFactory = splitter(package = "WMCore.DataStructs",
                              subscription = testSubscription)
        #The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun = True,
                               halt_job_on_file_boundaries = False,
                               events_per_job = 700)
        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")

Example #58

0

Show file

File: RunBased.py Project: dmwm/WMCore-legacy

    def algorithm(self, *args, **kwargs):
        """
        _algorithm_
        
        Implement run splitting algorithm. Assumes that if a file with a run is
        present, then all files for that run are also present.
        
        kwargs can take:
        files_per_job - e.g. 20 - Number of files per each split job
        """
        filesPerJob = kwargs.get("files_per_job", 300)
        requireRunClosed = kwargs.get("require_run_closed", False)

        #baseName = makeUUID()

        # Select all primary files for the first present run
        curRun = None
        primaryFiles = []
        #The current objective of this code is to find all runs in
        #a fileset, and then for each run, create a jobGroup
        #in each jobGroup have a list of jobs containing all the
        #files for that run.
        #If files have more then one run, sort that file with
        #the lowest run
        #In future, mask these files?

        runDict = {}

        locationDict = self.sortByLocation()

        for location in locationDict.keys():
            fileList = locationDict[location]
            for f in fileList:

                #If it is a WMBS object, load all data
                if hasattr(f, "loadData"):
                    f.loadData()

                #Die if there are no runs
                if len(f['runs']) < 1:
                    msg = "File %s claims to contain %s runs!" % (
                        f['lfn'], len(f['runs']))
                    raise RuntimeError, msg

                #First we need to pick the lowest run
                runList = []
                for r in f['runs']:
                    runList.append(r.run)

                run = min(runList)

                #If we don't have the run, we need to add it
                if not run in runDict.keys():
                    runDict[run] = []

                runDict[run].append(f)

            for run in runDict.keys():
                #Find the runs in the dictionary we assembled and split the files in them

                self.newGroup()
                baseName = makeUUID()

                #Now split them into sections according to files per job
                while len(runDict[run]) > 0:
                    jobFiles = Fileset()
                    for i in range(filesPerJob):
                        #Watch out if your last job has less then the full number of files
                        if len(runDict[run]) > 0:
                            jobFiles.addFile(runDict[run].pop())

                    # Create the job
                    currentJob = self.newJob(
                        '%s-%s' % (baseName, len(self.currentGroup.newjobs)),
                        files=jobFiles)

Example #59

0

Show file

class EventBasedTest(unittest.TestCase):
    """
    _EventBasedTest_

    Test event based job splitting.
    """
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('se01')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('se02')
        self.singleFileFileset.addFile(newFile)

        self.emptyFileFileset = Fileset(name="TestFileset3")
        newFile = File("/some/file/name", size=1000, events=0)
        newFile.setdefault('se03')
        self.emptyFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.emptyFileSubscription = Subscription(
            fileset=self.emptyFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")

        return

    def tearDown(self):
        """
        _tearDown_

        Nothing to do...
        """
        pass

    def generateFakeMCFile(self,
                           numEvents=100,
                           firstEvent=1,
                           lastEvent=100,
                           firstLumi=1,
                           lastLumi=10):
        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name="MCTestFileset")
        newFile = File("MCFakeFileTest", size=1000, events=numEvents)
        newFile.setLocation('se01')
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        testWorkflow = Workflow()
        singleMCFileset.addFile(newFile)
        singleMCFileSubscription = Subscription(fileset=singleMCFileset,
                                                workflow=testWorkflow,
                                                split_algo="EventBased",
                                                type="Production")
        return singleMCFileSubscription

    def testNoEvents(self):
        """
        _testNoEvents_

        Test event based job splitting where there are no events in the
        input file, make sure the mask events are None
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.emptyFileSubscription)
        jobGroups = jobFactory(events_per_job=100)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs.pop()

        self.assertEqual(job.getFiles(type="lfn"), ["/some/file/name"],
                         "ERROR: Job contains unknown files")
        self.assertEqual(job["mask"].getMaxEvents(), None,
                         "ERROR: Mask maxEvents is not None")

    def testExactEvents(self):
        """
        _testExactEvents_

        Test event based job splitting when the number of events per job is
        exactly the same as the number of events in the input file.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(events_per_job=100)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        assert job["mask"].getMaxEvents() is None, \
               "ERROR: Job's max events is incorrect."

        assert job["mask"]["FirstEvent"] == 0, \
               "ERROR: Job's first event is incorrect."

        return

    def testMoreEvents(self):
        """
        _testMoreEvents_

        Test event based job splitting when the number of events per job is
        greater than the number of events in the input file.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)

        jobGroups = jobFactory(events_per_job=1000)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory created %s jobs not one" % len(jobGroups[0].jobs)

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        assert job["mask"].getMaxEvents() is None, \
               "ERROR: Job's max events is incorrect."

        assert job["mask"]["FirstEvent"] is None, \
               "ERROR: Job's first event is incorrect."

        return

    def test50EventSplit(self):
        """
        _test50EventSplit_

        Test event based job splitting when the number of events per job is
        50, this should result in two jobs.
        """

        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)

        jobGroups = jobFactory(events_per_job=50)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 2, \
               "ERROR: JobFactory created %s jobs not two" % len(jobGroups[0].jobs)

        firstEvents = []
        for job in jobGroups[0].jobs:
            assert job.getFiles(type = "lfn") == ["/some/file/name"], \
                   "ERROR: Job contains unknown files."

            assert job["mask"].getMaxEvents() == 50 or job["mask"].getMaxEvents() is None, \
                   "ERROR: Job's max events is incorrect."

            assert job["mask"]["FirstEvent"] in [0, 50], \
                   "ERROR: Job's first event is incorrect."

            assert job["mask"]["FirstEvent"] not in firstEvents, \
                   "ERROR: Job's first event is repeated."
            firstEvents.append(job["mask"]["FirstEvent"])

        return

    def test99EventSplit(self):
        """
        _test99EventSplit_

        Test event based job splitting when the number of events per job is
        99, this should result in two jobs.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)

        jobGroups = jobFactory(events_per_job=99)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 2, \
               "ERROR: JobFactory created %s jobs not two" % len(jobGroups[0].jobs)

        firstEvents = []
        for job in jobGroups[0].jobs:
            assert job.getFiles(type = "lfn") == ["/some/file/name"], \
                   "ERROR: Job contains unknown files."

            self.assertTrue(
                job["mask"].getMaxEvents() == 99
                or job['mask'].getMaxEvents() is None,
                "ERROR: Job's max events is incorrect.")

            assert job["mask"]["FirstEvent"] in [0, 99], \
                   "ERROR: Job's first event is incorrect."

            assert job["mask"]["FirstEvent"] not in firstEvents, \
                   "ERROR: Job's first event is repeated."
            firstEvents.append(job["mask"]["FirstEvent"])

        return

    def test100EventMultipleFileSplit(self):
        """
        _test100EventMultipleFileSplit_

        Test job splitting into 100 event jobs when the input subscription has
        more than one file available.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        jobGroups = jobFactory(events_per_job=100)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 10, \
               "ERROR: JobFactory created %s jobs not ten" % len(jobGroups[0].jobs)

        for job in jobGroups[0].jobs:
            assert len(job.getFiles(type = "lfn")) == 1, \
                   "ERROR: Job contains too many files."

            assert job["mask"].getMaxEvents() is None, \
                   "ERROR: Job's max events is incorrect."

            assert job["mask"]["FirstEvent"] == 0, \
                   "ERROR: Job's first event is incorrect."

        return

    def test50EventMultipleFileSplit(self):
        """
        _test50EventMultipleFileSplit_

        Test job splitting into 50 event jobs when the input subscription has
        more than one file available.
        """

        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        jobGroups = jobFactory(events_per_job=50)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 20, \
               "ERROR: JobFactory created %s jobs not twenty" % len(jobGroups[0].jobs)

        for job in jobGroups[0].jobs:
            assert len(job.getFiles(type = "lfn")) == 1, \
                   "ERROR: Job contains too many files."

            assert job["mask"].getMaxEvents() == 50 or job["mask"].getMaxEvents() is None, \
                   "ERROR: Job's max events is incorrect."

            assert job["mask"]["FirstEvent"] in [0, 50], \
                   "ERROR: Job's first event is incorrect."

        return

    def test150EventMultipleFileSplit(self):
        """
        _test150EventMultipleFileSplit_

        Test job splitting into 150 event jobs when the input subscription has
        more than one file available.  This test verifies that the job splitting
        code will put at most one file in a job.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        jobGroups = jobFactory(events_per_job=150)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 10, \
               "ERROR: JobFactory created %s jobs not ten" % len(jobGroups[0].jobs)

        for job in jobGroups[0].jobs:
            assert len(job.getFiles(type = "lfn")) == 1, \
                   "ERROR: Job contains too many files."

            assert job["mask"].getMaxEvents() is None, \
                   "ERROR: Job's max events is incorrect."

            assert job["mask"]["FirstEvent"] is None, \
                   "ERROR: Job's first event is incorrect."

    def testMCExactEvents(self):
        """
        _testMCExactEvents_
        Test event based job splitting when the number of events per job is
        exactly the same as the number of events in the input file and no lumi
        information was supplied.
        """
        singleMCSubscription = self.generateFakeMCFile(firstLumi=1, lastLumi=1)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=100)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 1,
            "Error: JobFactory created %s jobs not one" %
            len(jobGroups[0].jobs))

        job = jobGroups[0].jobs.pop()

        self.assertEqual(job.getFiles(type="lfn"), ["MCFakeFileTest"],
                         "Error: Job contains unknown files.")

        self.assertEqual(
            job["mask"].getMaxEvents(), 100,
            "Error: Job's max events is incorrect.%i" %
            job["mask"].getMaxEvents())
        self.assertEqual(job["mask"]["FirstEvent"], 1,
                         "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 1,
                         "Error: Job's first lumi is incorrect.")
        self.assertEqual(len(job["mask"].getRunAndLumis()), 0,
                         "Error: Job's mask has runs and lumis")

    def testMCMoreEvents(self):
        """
        _testMCMoreEvents_

        Test event based job splitting when the number of events per job is
        greater than the number of events in the input file and no lumi
        information was supplied.
        """
        singleMCSubscription = self.generateFakeMCFile(firstLumi=1, lastLumi=1)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=1000)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 1,
            "Error: JobFactory created %s jobs not one" %
            len(jobGroups[0].jobs))

        job = jobGroups[0].jobs.pop()

        self.assertEqual(job.getFiles(type="lfn"), ["MCFakeFileTest"],
                         "Error: Job contains unknown files.")

        self.assertEqual(job["mask"].getMaxEvents(), 100,
                         "Error: Job's max events is incorrect.")
        self.assertEqual(job["mask"]["FirstEvent"], 1,
                         "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 1,
                         "Error: Job's first lumi is incorrect.")
        self.assertEqual(len(job["mask"].getRunAndLumis()), 0,
                         "Error: Job's mask has runs and lumis")

    def testMC99EventSplit(self):
        """
        _testMC99EventSplit_

        Test event based job splitting when the number of events per job is
        99, this should result in two jobs.
        No lumi information is supplied here.
        """
        singleMCSubscription = self.generateFakeMCFile(firstLumi=1, lastLumi=2)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=99)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 99
                                 and job["mask"]["FirstLumi"] == 1
                                 and job["mask"]["FirstEvent"] == 1)
            secondJobCondition = (job["mask"].getMaxEvents() == 1
                                  and job["mask"]["FirstLumi"] == 2
                                  and job["mask"]["FirstEvent"] == 100)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])

    def testMC50EventSplit(self):
        """
        _testMC50EventSplit_

        Test event based job splitting when the number of events per job is
        50, this should result in two jobs.
        No lumi information supplied here.
        """
        singleMCSubscription = self.generateFakeMCFile(firstLumi=1, lastLumi=2)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=50)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 50
                                 and job["mask"]["FirstLumi"] == 1
                                 and job["mask"]["FirstEvent"] == 1)
            secondJobCondition = (job["mask"].getMaxEvents() == 50
                                  and job["mask"]["FirstLumi"] == 2
                                  and job["mask"]["FirstEvent"] == 51)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])
        return

    def testMCShiftedEventSplit(self):
        """
        _testMCShiftedEventSplit_

        Performs different tests with files that start with event counters
        different than 1, lumi information remains default.
        """
        singleMCSubscription = self.generateFakeMCFile(numEvents=600,
                                                       firstEvent=201,
                                                       lastEvent=800)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=600)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 1,
            "Error: JobFactory created %s jobs not one" %
            len(jobGroups[0].jobs))
        job = jobGroups[0].jobs.pop()
        self.assertEqual(job["mask"].getMaxEvents(), 600,
                         "Error: Job's max events is incorrect.")
        self.assertEqual(job["mask"]["FirstEvent"], 201,
                         "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 1,
                         "Error: Job's first lumi is incorrect.")

        singleMCSubscription = self.generateFakeMCFile(numEvents=600,
                                                       firstEvent=201,
                                                       lastEvent=800)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)
        jobGroups = jobFactory(events_per_job=6000)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 1,
            "Error: JobFactory created %s jobs not one" %
            len(jobGroups[0].jobs))
        job = jobGroups[0].jobs.pop()
        self.assertEqual(job["mask"].getMaxEvents(), 600,
                         "Error: Job's max events is incorrect.")
        self.assertEqual(job["mask"]["FirstEvent"], 201,
                         "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 1,
                         "Error: Job's first lumi is incorrect.")

        singleMCSubscription = self.generateFakeMCFile(numEvents=600,
                                                       firstEvent=201,
                                                       lastEvent=800)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)
        jobGroups = jobFactory(events_per_job=599)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 599
                                 and job["mask"]["FirstLumi"] == 1
                                 and job["mask"]["FirstEvent"] == 201)
            secondJobCondition = (job["mask"].getMaxEvents() == 1
                                  and job["mask"]["FirstLumi"] == 2
                                  and job["mask"]["FirstEvent"] == 800)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])

        singleMCSubscription = self.generateFakeMCFile(numEvents=600,
                                                       firstEvent=201,
                                                       lastEvent=800)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)
        jobGroups = jobFactory(events_per_job=300)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 300
                                 and job["mask"]["FirstLumi"] == 1
                                 and job["mask"]["FirstEvent"] == 201)
            secondJobCondition = (job["mask"].getMaxEvents() == 300
                                  and job["mask"]["FirstLumi"] == 2
                                  and job["mask"]["FirstEvent"] == 501)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])

    def testMCShiftedLumiSplit(self):
        """
        _testMCShiftedLumiSplit

        Perform different tests with files that have lumi counters starting
        in something different than 1, however the splitting algorithm
        splits lumi with it's default value.
        """
        singleMCSubscription = self.generateFakeMCFile(firstLumi=345,
                                                       lastLumi=345)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=100)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 1,
            "Error: JobFactory created %s jobs not one" %
            len(jobGroups[0].jobs))
        job = jobGroups[0].jobs.pop()
        self.assertEqual(job["mask"].getMaxEvents(), 100,
                         "Error: Job's max events is incorrect.")
        self.assertEqual(job["mask"]["FirstEvent"], 1,
                         "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 345,
                         "Error: Job's first lumi is incorrect.")

        singleMCSubscription = self.generateFakeMCFile(firstLumi=345,
                                                       lastLumi=345)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)
        jobGroups = jobFactory(events_per_job=1000)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 1,
            "Error: JobFactory created %s jobs not one" %
            len(jobGroups[0].jobs))
        job = jobGroups[0].jobs.pop()
        self.assertEqual(job["mask"].getMaxEvents(), 100,
                         "Error: Job's max events is incorrect.")
        self.assertEqual(job["mask"]["FirstEvent"], 1,
                         "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 345,
                         "Error: Job's first lumi is incorrect.")

        singleMCSubscription = self.generateFakeMCFile(firstLumi=345,
                                                       lastLumi=345)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)
        jobGroups = jobFactory(events_per_job=99)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 99
                                 and job["mask"]["FirstLumi"] == 345
                                 and job["mask"]["FirstEvent"] == 1)
            secondJobCondition = (job["mask"].getMaxEvents() == 1
                                  and job["mask"]["FirstLumi"] == 346
                                  and job["mask"]["FirstEvent"] == 100)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])

        singleMCSubscription = self.generateFakeMCFile(firstLumi=345,
                                                       lastLumi=345)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)
        jobGroups = jobFactory(events_per_job=50)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 50
                                 and job["mask"]["FirstLumi"] == 345
                                 and job["mask"]["FirstEvent"] == 1)
            secondJobCondition = (job["mask"].getMaxEvents() == 50
                                  and job["mask"]["FirstLumi"] == 346
                                  and job["mask"]["FirstEvent"] == 51)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])

    def testMCLumiSplit(self):
        """
        _testMCLumiSplit_

        2 tests on lumi splitting are performed:
            1. The number of events per job is a multiple of the events
            per lumi
            2. The number of events per job is not a multiple of the events
            per lumi

        """
        singleMCSubscription = self.generateFakeMCFile(numEvents=150,
                                                       lastEvent=150,
                                                       lastLumi=15)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=100, events_per_lumi=10)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 50
                                 and job["mask"]["FirstLumi"] == 11
                                 and job["mask"]["FirstEvent"] == 101)
            secondJobCondition = (job["mask"].getMaxEvents() == 100
                                  and job["mask"]["FirstLumi"] == 1
                                  and job["mask"]["FirstEvent"] == 1)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])

        singleMCSubscription = self.generateFakeMCFile(numEvents=150,
                                                       lastEvent=150,
                                                       lastLumi=15)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=111, events_per_lumi=10)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 39
                                 and job["mask"]["FirstLumi"] == 13
                                 and job["mask"]["FirstEvent"] == 112
                                 and job["mask"]["LastLumi"] == 17)
            secondJobCondition = (job["mask"].getMaxEvents() == 111
                                  and job["mask"]["FirstLumi"] == 1
                                  and job["mask"]["FirstEvent"] == 1)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])

    def testMCEventSplitOver32bit(self):
        """
        _testMCEventSplitOver32bit_

        Make sure that no events will go over a 32 bit unsigned integer
        representation, event counter should be reset in that case.
        Also test is not over cautious.
        """
        firstEvent = 1
        singleMCSubscription = self.generateFakeMCFile(numEvents=2**32,
                                                       firstEvent=firstEvent)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=2**32 - 1,
                               events_per_lumi=2**32 - 1)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 2**32 - 1
                                 and job["mask"]["FirstLumi"] == 1
                                 and job["mask"]["FirstEvent"] == firstEvent
                                 and job["mask"]["LastEvent"] <= 2**32)
            secondJobCondition = (job["mask"].getMaxEvents() == 1
                                  and job["mask"]["FirstLumi"] == 2
                                  and job["mask"]["FirstEvent"] == 1)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])

        firstEvent = 1
        singleMCSubscription = self.generateFakeMCFile(numEvents=2**32 - 1,
                                                       firstEvent=firstEvent)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=2**31, events_per_lumi=2**32)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 2**31
                                 and job["mask"]["FirstLumi"] == 1
                                 and job["mask"]["FirstEvent"] == firstEvent
                                 and job["mask"]["LastEvent"] <= 2**32)
            secondJobCondition = (job["mask"].getMaxEvents() == 2**31 - 1
                                  and job["mask"]["FirstLumi"] == 2
                                  and job["mask"]["FirstEvent"] == 2**31 + 1
                                  and job["mask"]["LastEvent"] <= 2**32)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])
        firstEvent = 1
        singleMCSubscription = self.generateFakeMCFile(numEvents=2**32 - 1,
                                                       firstEvent=firstEvent)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=2**32, events_per_lumi=2**32)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 1,
            "Error: JobFactory created %s jobs not one" %
            len(jobGroups[0].jobs))

        job = jobGroups[0].jobs.pop()

        self.assertEqual(job["mask"].getMaxEvents(), 2**32 - 1,
                         "Error: Job's max events is incorrect.")
        self.assertEqual(job["mask"]["FirstEvent"], 1,
                         "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 1,
                         "Error: Job's first lumi is incorrect.")

        firstEvent = 2**32 - 1
        singleMCSubscription = self.generateFakeMCFile(numEvents=2,
                                                       firstEvent=firstEvent)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=3, events_per_lumi=1)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 1,
            "Error: JobFactory created %s jobs not one" %
            len(jobGroups[0].jobs))

        job = jobGroups[0].jobs.pop()

        self.assertEqual(job["mask"].getMaxEvents(), 2,
                         "Error: Job's max events is incorrect.")
        self.assertEqual(job["mask"]["FirstEvent"], 1,
                         "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 1,
                         "Error: Job's first lumi is incorrect.")

        firstEvent = 2**32
        singleMCSubscription = self.generateFakeMCFile(numEvents=50,
                                                       firstEvent=firstEvent)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=60, events_per_lumi=10)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 1,
            "Error: JobFactory created %s jobs not one" %
            len(jobGroups[0].jobs))

        job = jobGroups[0].jobs.pop()

        self.assertEqual(job["mask"].getMaxEvents(), 50,
                         "Error: Job's max events is incorrect.")
        self.assertEqual(job["mask"]["FirstEvent"], 1,
                         "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 1,
                         "Error: Job's first lumi is incorrect.")

        firstEvent = 2**32
        singleMCSubscription = self.generateFakeMCFile(numEvents=50,
                                                       firstEvent=firstEvent)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job=30, events_per_lumi=10)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(
            len(jobGroups[0].jobs), 2,
            "Error: JobFactory created %s jobs not two" %
            len(jobGroups[0].jobs))

        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 30
                                 and job["mask"]["FirstLumi"] == 1
                                 and job["mask"]["FirstEvent"] == 1
                                 and job["mask"]["LastEvent"] <= 2**32)
            secondJobCondition = (job["mask"].getMaxEvents() == 20
                                  and job["mask"]["FirstLumi"] == 4
                                  and job["mask"]["FirstEvent"] == 31)
            self.assertTrue(
                firstJobCondition or secondJobCondition,
                "Job mask: %s didn't pass neither of the conditions" %
                job["mask"])