Example #1
0
    def testMoreEvents(self):
        """
        _testMoreEvents_

        Test event based job splitting when the number of events per job is
        greater than the number of events in the input file.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)

        jobGroups = jobFactory(events_per_job = 1000,
                               performance = self.performanceParams)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory created %s jobs not one" % len(jobGroups[0].jobs)

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        assert job["mask"].getMaxEvents() is None, \
               "ERROR: Job's max events is incorrect."

        assert job["mask"]["FirstEvent"] is None, \
               "ERROR: Job's first event is incorrect."

        return
    def testHardLimitSplittingOnly(self):
        """
        _testHardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)

        # Fail single lumis with more than 800 events and put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550,
                               job_time_limit=9600, performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        for job in jobs:
            self.assertTrue(job['failedOnCreation'])
            self.assertIn(' with too many events 1000 and it woud take 12000 sec to run', job['failedReason'])

        return
Example #3
0
    def testAllAcquired(self):
        """
        _testAllAcquired_
        should all return no job groups
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.acquireFiles(
                           self.singleFileSubscription.availableFiles())
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        self.multipleFileSubscription.acquireFiles(
                           self.multipleFileSubscription.availableFiles())
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        self.multipleLumiSubscription.acquireFiles(
                           self.multipleLumiSubscription.availableFiles())
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.acquireFiles(
                           self.singleLumiSubscription.availableFiles())
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")
Example #4
0
    def __init__(self, **configDict):
        """
        init jobCreator
        """

        myThread = threading.currentThread()

        self.transaction = myThread.transaction

        # DAO factory for WMBS objects
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=logging,
                                     dbinterface=myThread.dbi)

        # WMCore splitter factory for splitting up jobs.
        self.splitterFactory = SplitterFactory()

        config = Configuration()
        config.section_("JobStateMachine")
        config.JobStateMachine.couchurl = configDict["couchURL"]
        config.JobStateMachine.couch_retries = configDict["defaultRetries"]
        config.JobStateMachine.couchDBName = configDict["couchDBName"]

        self.config = config

        # Variables
        self.jobCacheDir = configDict['jobCacheDir']
        self.defaultJobType = configDict['defaultJobType']
        self.limit = configDict.get('fileLoadLimit', 500)

        self.createWorkArea = CreateWorkArea()

        self.changeState = ChangeState(self.config)

        return
Example #5
0
    def testE_getParents(self):
        """
        _getParents_

        Test the TwoFileBased version of this code
        """

        splitter = SplitterFactory()

        oneSetSubscription = self.createSubscription(nFiles=10, lumisPerFile=1)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=oneSetSubscription)

        jobGroups = jobFactory(lumis_per_job=3,
                               split_files_between_job=True,
                               include_parents=True,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 10)
        for job in jobGroups[0].jobs:
            self.assertTrue(len(job['input_files']), 1)
            f = job['input_files'][0]
            self.assertEqual(len(f['parents']), 1)
            self.assertEqual(f['lfn'].split('_')[0],
                             list(f['parents'])[0]['lfn'].split('_')[0])

        return
Example #6
0
    def testMoreEvents(self):
        """
        _testMoreEvents_

        Test event based job splitting when the number of events per job is
        greater than the number of events in the input file.
        Since the file has less events than the splitting, the job goes without a mask.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS", subscription = self.singleFileSubscription)

        jobGroups = jobFactory(events_per_job = 1000)

        self.assertEqual(len(jobGroups), 1)

        self.assertEqual(len(jobGroups[0].jobs), 1)

        job = jobGroups[0].jobs.pop()

        self.assertEqual(job.getFiles(type = "lfn"), ["/some/file/name"])

        self.assertEqual(job["mask"].getMaxEvents(), None)

        self.assertEqual(job["mask"]["FirstEvent"], None)

        return
Example #7
0
    def testMCEventSplitOver32bit(self):
        """
        _testMCEventSplitOver32bit_

        Make sure that no events will go over a 32 bit unsigned integer
        representation, event counter should be reset in that case.
        Also test is not over cautious.
        """
        firstEvent = 3*(2**30) + 1
        singleMCSubscription = self.generateFakeMCFile(numEvents = 2**30,
                                                       firstEvent = firstEvent)
        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",subscription = singleMCSubscription)

        jobGroups = jobFactory(events_per_job = 2**30 - 1,
                               events_per_lumi = 2**30 - 1)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "Error: JobFactory created %s jobs not two"
                         % len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 2**30 - 1 and
                                 job["mask"]["FirstLumi"] == 1 and
                                 job["mask"]["FirstEvent"] == firstEvent and
                                 job["mask"]["LastEvent"] <= 2**32)
            secondJobCondition = (job["mask"].getMaxEvents() == 1 and
                                  job["mask"]["FirstLumi"] == 2 and
                                  job["mask"]["FirstEvent"] == 1)
            self.assertTrue(firstJobCondition or secondJobCondition,
                            "Job mask: %s didn't pass neither of the conditions"
                            % job["mask"])
Example #8
0
    def testSiteBlacklist(self):
        """
        _testSiteBlacklist_

        Same as testLocationSplit, but with a siteBlacklist for T2_CH_CERN.
        (do not allow jobs to run at T2_CH_CERN)

        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleSiteSubscription)

        jobGroups = jobFactory(files_per_job=10,
                               siteBlacklist=["T2_CH_CERN"],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 2)
        self.assertEqual(len(jobGroups[0].jobs), 1)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedMemoryUsage"], 2300)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedDiskUsage"],
                         100 * 400 * 5)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedJobTime"],
                         100 * 12 * 5)
        self.assertEqual(jobGroups[0].jobs[0]["possiblePSN"],
                         set(["T1_US_FNAL"]))
        self.assertEqual(len(jobGroups[1].jobs[0].getFiles()), 5)
        self.assertEqual(jobGroups[1].jobs[0]["estimatedMemoryUsage"], 2300)
        self.assertEqual(jobGroups[1].jobs[0]["estimatedDiskUsage"],
                         100 * 400 * 5)
        self.assertEqual(jobGroups[1].jobs[0]["estimatedJobTime"],
                         100 * 12 * 5)
        self.assertEqual(jobGroups[1].jobs[0]["possiblePSN"],
                         set(["T1_US_FNAL"]))

        return
Example #9
0
    def testTrustSiteLists(self):
        """
        _testTrustSiteLists_

        Test trustSitelists splitting parameter to ignore job input file
        location and use siteWhitelist and siteBlacklist instead.

        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleSiteSubscription)

        jobGroups = jobFactory(
            files_per_job=10,
            siteWhitelist=["T2_AA_AAA", "T2_BB_BBB", "T2_CC_CCC", "T2_DD_DDD"],
            siteBlacklist=["T2_BB_BBB", "T2_DD_DDD", "T2_EE_EEE"],
            trustSitelists=True,
            performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 1)
        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 10)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedMemoryUsage"], 2300)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedDiskUsage"],
                         100 * 400 * 10)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedJobTime"],
                         100 * 12 * 10)
        self.assertEqual(jobGroups[0].jobs[0]["possiblePSN"],
                         set(["T2_AA_AAA", "T2_CC_CCC"]))

        return
Example #10
0
    def testMC50EventSplit(self):
        """
        _testMC50EventSplit_

        Test event based job splitting when the number of events per job is
        50, this should result in two jobs.
        No lumi information supplied here.
        """
        singleMCSubscription = self.generateFakeMCFile(firstLumi = 1,
                                                       lastLumi = 2)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job = 50,
                               performance = self.performanceParams)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "Error: JobFactory created %s jobs not two"
                         % len(jobGroups[0].jobs))
        for job in jobGroups[0].jobs:

            firstJobCondition = (job["mask"].getMaxEvents() == 50 and
                                 job["mask"]["FirstLumi"] == 1 and
                                 job["mask"]["FirstEvent"] == 1)
            secondJobCondition = (job["mask"].getMaxEvents() == 50 and
                                  job["mask"]["FirstLumi"] == 2 and
                                  job["mask"]["FirstEvent"] == 51)
            self.assertTrue(firstJobCondition or secondJobCondition,
                            "Job mask: %s didn't pass neither of the conditions"
                            % job["mask"])
            self.assertFalse(job.getBaggage().lheInputFiles)

        return
Example #11
0
    def testLocationSplit(self):
        """

        _testLocationSplit_

        This should test whether or not the FileBased algorithm understands that files at
        seperate sites cannot be in the same jobGroup (this is the current standard).

        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleSiteSubscription)

        jobGroups = jobFactory(files_per_job=10,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 2)
        self.assertEqual(len(jobGroups[0].jobs), 1)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedMemoryUsage"], 2300)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedDiskUsage"],
                         100 * 400 * 5)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedJobTime"],
                         100 * 12 * 5)
        self.assertEqual(jobGroups[0].jobs[0]["possiblePSN"],
                         set(["T2_CH_CERN", "T1_US_FNAL"]))
        self.assertEqual(len(jobGroups[1].jobs[0].getFiles()), 5)
        self.assertEqual(jobGroups[1].jobs[0]["estimatedMemoryUsage"], 2300)
        self.assertEqual(jobGroups[1].jobs[0]["estimatedDiskUsage"],
                         100 * 400 * 5)
        self.assertEqual(jobGroups[1].jobs[0]["estimatedJobTime"],
                         100 * 12 * 5)
        self.assertEqual(jobGroups[1].jobs[0]["possiblePSN"],
                         set(["T1_US_FNAL"]))

        return
Example #12
0
    def testMCMoreEvents(self):
        """
        _testMCMoreEvents_

        Test event based job splitting when the number of events per job is
        greater than the number of events in the input file and no lumi
        information was supplied.
        """
        singleMCSubscription = self.generateFakeMCFile(firstLumi = 1,
                                                       lastLumi = 1)
        splitter = SplitterFactory()
        jobFactory = splitter(singleMCSubscription)

        jobGroups = jobFactory(events_per_job = 1000,
                               performance = self.performanceParams)
        self.assertEqual(len(jobGroups), 1,
                         "Error: JobFactory did not return one JobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "Error: JobFactory created %s jobs not one"
                         % len(jobGroups[0].jobs))

        job = jobGroups[0].jobs.pop()

        self.assertEqual(job.getFiles(type = "lfn"), ["MCFakeFileTest"],
               "Error: Job contains unknown files.")

        self.assertEqual(job["mask"].getMaxEvents(), 100,
               "Error: Job's max events is incorrect.")
        self.assertEqual(job["mask"]["FirstEvent"], 1,
               "Error: Job's first event is incorrect.")
        self.assertEqual(job["mask"]["FirstLumi"], 1,
                "Error: Job's first lumi is incorrect.")
        self.assertEqual(len(job["mask"].getRunAndLumis()), 0,
                         "Error: Job's mask has runs and lumis")
Example #13
0
    def test150EventMultipleFileSplit(self):
        """
        _test150EventMultipleFileSplit_

        Test job splitting into 150 event jobs when the input subscription has
        more than one file available.  This test verifies that the job splitting
        code will put at most one file in a job.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        jobGroups = jobFactory(events_per_job = 150,
                               performance = self.performanceParams)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 10, \
               "ERROR: JobFactory created %s jobs not ten" % len(jobGroups[0].jobs)

        for job in jobGroups[0].jobs:
            assert len(job.getFiles(type = "lfn")) == 1, \
                   "ERROR: Job contains too many files."

            assert job["mask"].getMaxEvents() is None, \
                   "ERROR: Job's max events is incorrect."

            assert job["mask"]["FirstEvent"] is None, \
                   "ERROR: Job's first event is incorrect."
Example #14
0
    def test99EventSplit(self):
        """
        _test99EventSplit_

        Test event based job splitting when the number of events per job is
        99, this should result in two jobs.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)

        jobGroups = jobFactory(events_per_job = 99,
                               performance = self.performanceParams)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 2, \
               "ERROR: JobFactory created %s jobs not two" % len(jobGroups[0].jobs)

        firstEvents = []
        for job in jobGroups[0].jobs:
            assert job.getFiles(type = "lfn") == ["/some/file/name"], \
                   "ERROR: Job contains unknown files."

            self.assertTrue(job["mask"].getMaxEvents() == 99 or job['mask'].getMaxEvents() is None,
                            "ERROR: Job's max events is incorrect.")

            assert job["mask"]["FirstEvent"] in [0, 99], \
                   "ERROR: Job's first event is incorrect."

            assert job["mask"]["FirstEvent"] not in firstEvents, \
                   "ERROR: Job's first event is repeated."
            firstEvents.append(job["mask"]["FirstEvent"])

        return
Example #15
0
    def testMultipleRunsCombine(self):
        """
        _testMultipleRunsCombine_

        Test run based job splitting when the number of jobs is
        less then the number of files, with multiple files

        """

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleRunSubscription)

        jobGroups = jobFactory(files_per_job=2)



        assert len(jobGroups) == 4, \
               "ERROR: JobFactory didn't return one JobGroup per run."

        assert len(jobGroups[1].jobs) == 2, \
               "ERROR: JobFactory didn't put only one job in the first job"

        #Last one in the queue should have one job, previous two (three files per run)
        self.assertEqual(len(jobGroups[1].jobs.pop().getFiles(type="lfn")), 1)
        self.assertEqual(len(jobGroups[1].jobs.pop().getFiles(type="lfn")), 2)

        return
Example #16
0
    def test_getParents(self):
        """
        _getParents_

        Check that we can do the same as the TwoFileBased
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job=2,
                               include_parents=True,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 5)

        fileList = []
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job.getFiles()), 2)
            for file in job.getFiles(type="lfn"):
                fileList.append(file)
        self.assertEqual(len(fileList), 10)

        for j in jobGroups[0].jobs:
            for f in j['input_files']:
                self.assertEqual(len(f['parents']), 1)
                self.assertEqual(list(f['parents'])[0]['lfn'], '/parent/lfn/')

        return
Example #17
0
    def testExactEvents(self):
        """
        _testExactEvents_

        Test event based job splitting when the number of events per job is
        exactly the same as the number of events in the input file.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.singleFileSubscription)

        jobGroups = jobFactory(events_per_job = 100)

        self.assertEqual(len(jobGroups), 1)

        self.assertEqual(len(jobGroups[0].jobs), 1)

        job = jobGroups[0].jobs.pop()

        self.assertEqual(job.getFiles(type = "lfn"),  ["/some/file/name"])

        self.assertEqual(job["mask"].getMaxEvents(), None)

        self.assertEqual(job["mask"]["FirstEvent"], 0)

        return
Example #18
0
    def testMoreFiles(self):
        """
        _testMoreFiles_

        Test file based job splitting when the number of files per job is
        greater than the number of files in the input fileset.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)

        jobGroups = jobFactory(files_per_job=10,
                               performance=self.performanceParams)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        return
Example #19
0
    def test100EventMultipleSite(self):
        """
        _test100EventMultipleSite_

        Test job splitting into 100 event jobs when the input subscription has
        more than one file available, at different site combinations.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.multipleSiteSubscription)

        jobGroups = jobFactory(events_per_job = 100)

        self.assertEqual(len(jobGroups), 2)

        self.assertEqual(len(jobGroups[0].jobs), 5)
        self.assertEqual(len(jobGroups[1].jobs), 5)

        for job in jobGroups[0].jobs:
            self.assertEqual(len(job.getFiles(type = "lfn")), 1)

            self.assertEqual(job["mask"].getMaxEvents(), None)

            assert job["mask"]["FirstEvent"] == 0, \
                   "ERROR: Job's first event is incorrect."

        return
Example #20
0
    def test2FileSplit(self):
        """
        _test2FileSplit_

        Test file based job splitting when the number of files per job is
        2, this should result in five jobs.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job=2,
                               performance=self.performanceParams)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 5, \
               "ERROR: JobFactory didn't create two jobs."

        fileSet = set()
        for job in jobGroups[0].jobs:
            assert len(job.getFiles(type = "set")) == 2, \
                   "ERROR: Job contains incorrect number of files."

            for file in job.getFiles(type="lfn"):
                fileSet.add(file)

        assert len(fileSet) == 10, \
               "ERROR: Not all files assinged to job."

        return
Example #21
0
    def test_addParents(self):
        """
        _addParents_

        Test our ability to add parents to a job
        """

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.multipleFileSubscription)

        jobGroups = jobFactory(events_per_job = 50, include_parents = True)

        self.assertEqual(len(jobGroups), 1)

        self.assertEqual(len(jobGroups[0].jobs), 20)

        for job in jobGroups[0].jobs:
            self.assertEqual(len(job.getFiles(type = "lfn")), 1)
            self.assertTrue((job["mask"].getMaxEvents() == 50 and job["mask"]["FirstEvent"] == 0) or \
                            (job["mask"].getMaxEvents() is None and job["mask"]["FirstEvent"] == 50))
            for f in job['input_files']:
                self.assertEqual(len(f['parents']), 1)
                self.assertEqual(list(f['parents'])[0]['lfn'], '/parent/lfn/')

        return
Example #22
0
    def test3FileSplit(self):
        """
        _test3FileSplit_

        Test file based job splitting when the number of files per job is
        3, this should result in four jobs.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job=3,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)

        self.assertEqual(len(jobGroups[0].jobs), 4)

        fileList = []
        for job in jobGroups[0].jobs:
            assert len(job.getFiles(type = "list")) in [3, 1], \
                   "ERROR: Job contains incorrect number of files."

            for file in job.getFiles(type="lfn"):
                assert file not in fileList, \
                       "ERROR: File duplicated!"
                fileList.append(file)

        self.assertEqual(len(fileList), 10)

        return
Example #23
0
    def testExactEvents(self):
        """
        _testExactEvents_

        Test event based job splitting when the number of events per job is
        exactly the same as the number of events in the input file.
        """

        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)

        jobGroups = jobFactory(size_per_job = 1000)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."


        return
Example #24
0
    def test4WithLumiMask(self):
        """
        _test4WithLumiMask_

        Test file based job splitting when
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        jobGroups = jobFactory(
            files_per_job=2,
            total_files=3,
            runs=['1', '2', '4', '5'],
            lumis=['100,130', '203,204,207,221', '401,405', '500, 520'],
            performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)

        self.assertEqual(len(jobGroups[0].jobs), 2)

        fileList = []
        for job in jobGroups[0].jobs:
            assert len(job.getFiles(type = "list")) in [2, 1], \
                   "ERROR: Job contains incorrect number of files."

            for file in job.getFiles(type="lfn"):
                assert file not in fileList, \
                       "ERROR: File duplicated!"
                fileList.append(file)

        self.assertEqual(len(fileList), 3)

        return
Example #25
0
    def testF_RunWhitelist(self):
        """
        _runWhitelist_

        Apparently we're too stupid to do the runlist in
        the GoodRunlist where it would make sense.
        """

        splitter = SplitterFactory()

        oneSetSubscription = self.createSubscription(nFiles=10, lumisPerFile=1)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=oneSetSubscription)

        jobGroups = jobFactory(lumis_per_job=10,
                               split_files_between_job=True,
                               runWhitelist=[1],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 1)
        self.assertEqual(len(jobGroups[0].jobs[0]['input_files']), 1)
        self.assertEqual(len(jobGroups[0].jobs[0]['input_files'][0]['runs']),
                         1)
        self.assertEqual(jobGroups[0].jobs[0]['input_files'][0]['runs'][0].run,
                         1)
        return
Example #26
0
    def testFilesetCloseout2(self):
        """
        _testFilesetCloseout2_

        Verify that the fail orphan file code does not fail files that have
        failed for other workflows.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        # Get out all the good merge jobs out of the way.
        result = jobFactory(min_merge_size=1,
                            max_merge_size=999999999999,
                            max_merge_events=999999999)

        self.assertEqual(len(result), 1, "Error: Wrong number of job groups.")
        self.assertEqual(len(result[0].jobs), 2,
                         "Error: Wrong number of jobs.")

        failedAction = self.daoFactory(
            classname="Subscriptions.GetFailedFiles")
        failedFiles = failedAction.execute(self.mergeSubscription["id"])

        self.assertEqual(
            len(failedFiles), 4,
            "Error: Wrong number of failed files: %s" % failedFiles)
        return
    def testLumiMaskAndWhitelist(self):
        """
        _testLumiMaskAndWhitelist_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=850,
                               runs=['1', '2', '4'],
                               lumis=['10,14', '20,21', '40,41'],
                               runWhitelist=[1, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1)
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[10, 14]], 4: [[40, 41]]})
Example #28
0
    def testMoreRuns(self):
        """
        _testMoreEvents_

        Test run based job splitting when the number of runs per job is
        greater than the number of runs in the input file.
        """

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.singleFileSubscription)

        jobGroups = jobFactory(files_per_job=2)

        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        return
Example #29
0
    def test2FileSplit(self):
        """
        _test2FileSplit_

        Test file based job splitting when the number of files per job is
        2, this should result in five jobs.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job=2,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 5)

        fileList = []
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job.getFiles()), 2)
            for file in job.getFiles(type="lfn"):
                fileList.append(file)
            self.assertEqual(job["estimatedMemoryUsage"], 2300)
            self.assertEqual(job["estimatedDiskUsage"], 400 * 100 * 2)
            self.assertEqual(job["estimatedJobTime"], 12 * 100 * 2)

        self.assertEqual(len(fileList), 10)

        return
Example #30
0
    def test100EventMultipleSite(self):
        """
        _test100EventMultipleSite_

        Test job splitting into 100 event jobs when the input subscription has
        more than one file available, at different site combinations.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleSiteSubscription)

        jobGroups = jobFactory(events_per_job=self.eventsPerJob,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 2)

        self.assertEqual(len(jobGroups[0].jobs), 5)
        self.assertEqual(len(jobGroups[1].jobs), 5)
        self.assertEqual(jobGroups[0].jobs[0]['possiblePSN'], set(['s1', 's2']))
        self.assertEqual(jobGroups[1].jobs[0]['possiblePSN'], set(['s1']))

        for job in jobGroups[0].jobs:
            self.assertEqual(len(job.getFiles(type="lfn")), 1)

            self.assertEqual(job["mask"].getMaxEvents(), self.eventsPerJob)

            self.assertEqual(job["mask"]["FirstEvent"], 0)
            self.assertEqual(job["estimatedJobTime"], 100 * 12)
            self.assertEqual(job["estimatedDiskUsage"], 400 * 100)
            self.assertEqual(job["estimatedMemoryUsage"], 2300)

        return