コード例 #1
0
ファイル: FixedDelay_t.py プロジェクト: menglu21/WMCore
    def testClosedSomeAcquired(self):
        """
        _testClosedSomeAcquired_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.multipleFileSubscription.getFileset().markOpen(False)

        self.singleFileSubscription.acquireFiles(
                           [self.singleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")



        self.multipleFileSubscription.getFileset().markOpen(False)
        self.multipleFileSubscription.acquireFiles(
                           [self.multipleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(package = "WMCore.WMBS", subscription =self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEqual(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.multipleLumiSubscription.getFileset().markOpen(False)
        self.multipleLumiSubscription.acquireFiles(
                           [self.multipleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEqual(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.singleLumiSubscription.getFileset().markOpen(False)
        self.singleLumiSubscription.acquireFiles(
                           [self.singleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEqual(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.assertEqual(len(myfiles), 9)
コード例 #2
0
ファイル: LumiBased_t.py プロジェクト: ticoann/WMCore
    def testB_NoRunNoFileSplitting(self):
        """
        _NoRunNoFileSplitting_

        Test the splitting algorithm in the odder fringe
        cases that might be required.
        """
        splitter = SplitterFactory()
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        jobGroups = jobFactory(lumis_per_job=3,
                               halt_job_on_file_boundaries=False,
                               splitOnRun=False)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 9)

        # The first job should have three lumis from one run
        # The second three lumis from two different runs
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0L: [[0L, 2L]]})
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {
            0L: [[3L, 4L]],
            1L: [[100L, 100L]]
        })

        # And it should still be the same when you load it out of the database
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {
            0L: [[3L, 4L]],
            1L: [[100L, 100L]]
        })

        # Assert that this works differently with file splitting on and run splitting on
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        jobGroups = jobFactory(lumis_per_job=3,
                               halt_job_on_file_boundaries=True,
                               splitOnRun=True)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 10)

        # In this case it should slice things up so that each job only has one run
        # in it.
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0L: [[0L, 2L]]})
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {0L: [[3L, 4L]]})
        return
コード例 #3
0
    def execute(self, *args, **kwargs):
        wmwork = Workflow(name=kwargs['task']['tm_taskname'])

        wmsubs = Subscription(
            fileset=args[0],
            workflow=wmwork,
            split_algo=kwargs['task']['tm_split_algo'],
            type=self.jobtypeMapper[kwargs['task']['tm_job_type']])
        splitter = SplitterFactory()
        jobfactory = splitter(subscription=wmsubs)
        splitparam = kwargs['task']['tm_split_args']
        splitparam['algorithm'] = kwargs['task']['tm_split_algo']
        if kwargs['task']['tm_job_type'] == 'Analysis':
            if kwargs['task']['tm_split_algo'] == 'FileBased':
                splitparam['total_files'] = kwargs['task']['tm_totalunits']
            elif kwargs['task']['tm_split_algo'] == 'LumiBased':
                splitparam['total_lumis'] = kwargs['task']['tm_totalunits']
        elif kwargs['task']['tm_job_type'] == 'PrivateMC':
            if 'tm_events_per_lumi' in kwargs['task'] and kwargs['task'][
                    'tm_events_per_lumi']:
                splitparam['events_per_lumi'] = kwargs['task'][
                    'tm_events_per_lumi']
            if 'tm_generator' in kwargs['task'] and kwargs['task'][
                    'tm_generator'] == 'lhe':
                splitparam['lheInputFiles'] = True
        splitparam['applyLumiCorrection'] = True
        factory = jobfactory(**splitparam)
        if len(factory) == 0:
            raise TaskWorkerException("The CRAB3 server backend could not submit any job to the Grid scheduler:\n"+\
                        "splitting task %s on dataset %s with %s method does not generate any job")
        #printing duplicated lumis if any
        lumiChecker = getattr(jobfactory, 'lumiChecker', None)
        if lumiChecker and lumiChecker.splitLumiFiles:
            self.logger.warning(
                "The input dataset contains the following duplicated lumis %s"
                % lumiChecker.splitLumiFiles.keys())
            try:
                configreq = {
                    'subresource':
                    'addwarning',
                    'workflow':
                    kwargs['task']['tm_taskname'],
                    'warning':
                    b64encode(
                        'The CRAB3 server backend detected lumis split across files in the input dataset.'
                        ' Will apply the necessary corrections in the splitting algorithms'
                    )
                }
                self.server.post(self.restURInoAPI + '/task',
                                 data=urllib.urlencode(configreq))
            except Exception, e:
                self.logger.error(e.headers)
                self.logger.warning(
                    "Cannot add warning to REST after finding duplicates")
コード例 #4
0
    def testFileSplitting(self):
        """
        _testFileSplitting_

        Test that things work if we split files between jobs
        """
        splitter = SplitterFactory()

        oneSetSubscription = self.createSubscription(nFiles=10, lumisPerFile=1)
        jobFactory = splitter(package="WMCore.DataStructs", subscription=oneSetSubscription)

        jobGroups = jobFactory(halt_job_on_file_boundaries=True, events_per_job=100, performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 10)
        for job in jobGroups[0].jobs:
            self.assertTrue(len(job['input_files']), 1)

        twoLumiFiles = self.createSubscription(nFiles=5, lumisPerFile=2)
        jobFactory = splitter(package="WMCore.DataStructs", subscription=twoLumiFiles)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True, events_per_job=50, performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 10)
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job['input_files']), 1)

        wholeLumiFiles = self.createSubscription(nFiles=5, lumisPerFile=3)
        jobFactory = splitter(package="WMCore.DataStructs", subscription=wholeLumiFiles)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True, events_per_job=67, performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        # 10 because we split on run boundaries
        self.assertEqual(len(jobGroups[0].jobs), 10)
        jobList = jobGroups[0].jobs
        for job in jobList:
            # Have should have one file, half two
            self.assertTrue(len(job['input_files']) in [1, 2])

        jobLumiList = [jobList[i]['mask'].getRunAndLumis() for i in range(0, 10)]
        correctJobLumiList = [{0: [[0, 1]]}, {0: [[2, 2]]},
                              {1: [[3, 4]]}, {1: [[5, 5]]},
                              {4: [[12, 13]]}, {4: [[14, 14]]}
                             ]

        for lumiList in correctJobLumiList:
            self.assertIn(lumiList, jobLumiList)

        # Do it with multiple sites
        twoSiteSubscription = self.createSubscription(nFiles=5, lumisPerFile=2, twoSites=True)
        jobFactory = splitter(package="WMCore.DataStructs", subscription=twoSiteSubscription)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True, events_per_job=50, performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 2)
        self.assertEqual(len(jobGroups[0].jobs), 10)
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job['input_files']), 1)
コード例 #5
0
    def testD_NonContinuousLumis(self):
        """
        _NonContinuousLumis_

        Test and see if LumiBased can work when the lumis are non continuous
        """


        baseName = makeUUID()
        nFiles   = 10

        testFileset = Fileset(name = baseName)
        testFileset.create()
        for i in range(nFiles):
            newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
                           events = 100, locations = "somese.cern.ch")
            # Set to two non-continuous lumi numbers
            lumis = [100 + i, 200 + i]
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            testFileset.addFile(newFile)

        testFileset.commit()


        testSubscription  = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "LumiBased",
                                         type = "Processing")
        testSubscription.create()

        splitter   = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = testSubscription)

        jobGroups = jobFactory(lumis_per_job = 2,
                               halt_job_on_file_boundaries = False,
                               splitOnRun = False)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 10)
        for j in jobs:
            runs =  j['mask'].getRunAndLumis()
            for r in runs.keys():
                self.assertEqual(len(runs[r]), 2)
                for l in runs[r]:
                    # Each run should have two lumis
                    # Each lumi should be of form [x, x]
                    # meaning that the first and last lumis are the same
                    self.assertEqual(len(l), 2)
                    self.assertEqual(l[0], l[1])

        return
コード例 #6
0
    def testD_HardLimitSplittingOnly(self):
        """
        _testD_HardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileset.create()
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1,
                                    "T1_US_FNAL_Disk")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1,
                                    "T1_US_FNAL_Disk")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1,
                                    "T1_US_FNAL_Disk")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        testSubscription.create()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               max_events_per_lumi=800,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
        for i in range(1, 4):
            self.assertTrue(
                jobs[i - 1]['failedOnCreation'],
                "The job processing the second file should me marked for failure"
            )
            self.assertEqual(
                jobs[i - 1]['failedReason'],
                "File /this/is/file%d has too many events (1000) in 1 lumi(s)"
                % i, "The reason for the failure is not accurate")

        return
コード例 #7
0
    def testHardLimitSplitting(self):
        """
        _testHardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5,
                                    "T1_US_FNAL_Disk")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1,
                                    "T1_US_FNAL_Disk")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2,
                                    "T1_US_FNAL_Disk")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # Settings are to split on job boundaries, to fail single lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 5)

        # One job should be failed, the rest should be fine
        for jobNum in (0, 1, 3, 4):
            self.assertFalse(jobs[jobNum].get('failedOnCreation'))
        self.assertTrue(jobs[2]['failedOnCreation'])
        self.assertEqual(
            jobs[2]['failedReason'],
            'File /this/is/file2 has a single lumi 1, in run 1 with too many events 1000 and it woud take 12000 sec to run'
        )

        return
コード例 #8
0
ファイル: Harvest_t.py プロジェクト: tsarangi/WMCore
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting")

        myThread = threading.currentThread()
        self.myThread = myThread
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)
        self.WMBSFactory = daoFactory

        config = self.getConfig()
        self.changer = ChangeState(config)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE", "SomeCE")
        myResourceControl.insertSite("SomeSite", 10, 20, "SomeSE2", "SomeCE")
        myResourceControl.insertSite("SomeSite2", 10, 20, "SomeSE3", "SomeCE2")

        self.fileset1 = Fileset(name="TestFileset1")
        for file in range(11):
            newFile = File("/some/file/name%d" % file, size=1000, events=100)
            newFile.addRun(Run(1, *[1]))
            newFile.setLocation('SomeSE')
            self.fileset1.addFile(newFile)

        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Harvest",
                                          type="Harvesting")

        self.subscription1.create()
        self.configFile = EmulatorSetup.setupWMAgentConfig()

        return
コード例 #9
0
ファイル: EventBased_t.py プロジェクト: todor-ivanov/WMCore
    def testACDCProduction_v2(self):
        """
        _testACDCProduction_v2_

        Test the ability of the EventBased algorithm of creating
        jobs from ACDC correctly. Uses the new ACDC document version.
        """
        numFiles = 3
        lumisPerJob = 4
        eventsPerJob = 100
        numberOfJobs = 12  # 200
        self.populateACDCCouch(numFiles=numFiles, lumisPerJob=lumisPerJob, eventsPerJob=eventsPerJob,
                               numberOfJobs=numberOfJobs, acdcVer=2)

        mcSubscription = None
        for idx in range(3):
            mcSubscription = self.generateFakeMCFile(numEvents=eventsPerJob * numberOfJobs,
                                                     firstEvent=idx * eventsPerJob * numberOfJobs + 1,
                                                     lastEvent=(idx + 1) * eventsPerJob * numberOfJobs,
                                                     firstLumi=idx * lumisPerJob * numberOfJobs + 1,
                                                     lastLumi=(idx + 1) * lumisPerJob * numberOfJobs,
                                                     index=idx, existingSub=mcSubscription)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=mcSubscription)

        jobGroups = jobFactory(events_per_job=eventsPerJob, events_per_lumi=int(eventsPerJob / lumisPerJob),
                               collectionName="ACDC_TestEventBased",
                               couchURL=self.couchUrl, couchDB=self.couchDBName,
                               filesetName="/ACDC_TestEventBased/Production",
                               performance=self.performanceParams)

        self.assertEqual(1, len(jobGroups))
        jobGroup = jobGroups[0]
        self.assertEqual(numFiles * numberOfJobs, len(jobGroup.jobs))

        for job in jobGroup.jobs:
            self.assertEqual(1, len(job["input_files"]))
            mask = job["mask"]
            self.assertEqual(mask.getMaxEvents(), eventsPerJob)
            self.assertEqual(mask.getMax("Event"), eventsPerJob)
            self.assertEqual(mask.getMax("Lumi"), lumisPerJob)
            self.assertEqual(mask.getMax("Run"), 1)
            self.assertEqual(mask["LastLumi"] - mask["FirstLumi"], lumisPerJob - 1)
            self.assertEqual(mask["LastEvent"] - mask["FirstEvent"], eventsPerJob - 1)
            self.assertEqual(mask["runAndLumis"], {})
            self.assertEqual(job["estimatedJobTime"], eventsPerJob * 12)
            self.assertEqual(job["estimatedDiskUsage"], eventsPerJob * 400)
            self.assertEqual(job["estimatedMemoryUsage"], 2300)

        return
コード例 #10
0
ファイル: WMBSMergeBySize_t.py プロジェクト: ticoann/WMCore
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s2", seName="somese3.cern.ch")

        fileSite2 = File(lfn="fileSite2",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["somese3.cern.ch"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()
        fileSite2.addParent(self.parentFileSite2["lfn"])

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999)

        assert len(result) == 1, \
               "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 2, \
               "ERROR: Two jobs should have been returned."

        for job in result[0].jobs:
            firstInputFile = job.getFiles()[0]
            baseLocation = list(firstInputFile["locations"])[0]

            for inputFile in job.getFiles():
                assert inputFile["locations"] == set(["somese.cern.ch", "somese2.cern.ch"]) or \
                       inputFile["locations"] == set(["somese3.cern.ch"]), \
                       "Error: Wrong number of locations"

                assert list(inputFile["locations"])[0] == baseLocation, \
                       "Error: Wrong location."

        return
コード例 #11
0
ファイル: LumiBased_t.py プロジェクト: vytjan/WMCore
    def testB_NoRunNoFileSplitting(self):
        """
        _NoRunNoFileSplitting_

        Test the splitting algorithm in the odder fringe
        cases that might be required.
        """
        splitter = SplitterFactory()
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        jobGroups = jobFactory(lumis_per_job=3,
                               halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 9)

        # The first job should have three lumis from one run
        # The second three lumis from two different runs
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0: [[0, 2]]})
        job1runLumi = jobs[1]['mask'].getRunAndLumis()
        self.assertEqual(job1runLumi[0][0][0] + 1,
                         job1runLumi[0][0][1])  # Run 0, startLumi+1 == endLumi
        self.assertEqual(job1runLumi[1][0][0],
                         job1runLumi[1][0][1])  # Run 1, startLumi == endLumi

        # Assert that this works differently with file splitting on and run splitting on
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        jobGroups = jobFactory(lumis_per_job=3,
                               halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 10)

        # In this case it should slice things up so that each job only has one run
        # in it.
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {0: [[0, 2]]})
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {0: [[3, 4]]})
        return
コード例 #12
0
    def testLargeNumberOfFiles(self):
        """
        _testLargeNumberOfFiles_

        Setup a subscription with 500 files and verify that the splitting algo
        works correctly.
        """
        testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve",
                                 name = "wfA", task = "Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve",
                                 name = "wfB", task = "Test")
        testWorkflowB.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i), size = 1000, events = 100,
                            locations = set(["somese.cern.ch"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(fileset = testFileset,
                                         workflow = testWorkflowA,
                                         split_algo = "FileBased",
                                         type = "Processing")
        testSubscriptionA.create()
        testSubscriptionB = Subscription(fileset = testFileset,
                                         workflow = testWorkflowB,
                                         split_algo = "SiblingProcessingBased",
                                         type = "Processing")
        testSubscriptionB.create()

        testSubscriptionA.completeFiles(allFiles)

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = testSubscriptionB)

        result = deleteFactoryA(files_per_job = 50)
        self.assertEqual(len(result), 1,
                         "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10,
                         "Error: Wrong number of jobs returned.")

        return
コード例 #13
0
    def testC_HardLimitSplitting(self):
        """
        _testC_HardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileset.create()
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5,
                                    "T1_US_FNAL_Disk")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1,
                                    "T1_US_FNAL_Disk")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2,
                                    "T1_US_FNAL_Disk")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        testSubscription.create()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               max_events_per_lumi=800,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        self.assertTrue(
            jobs[3]['failedOnCreation'],
            "The job processing the second file should me marked for failure")
        self.assertEqual(
            jobs[3]['failedReason'],
            "File /this/is/file2 has too many events (1000) in 1 lumi(s)",
            "The reason for the failure is not accurate")
コード例 #14
0
    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk")

        fileSite2 = File(lfn="fileRAL",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["T1_UK_RAL_Disk"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned."

        ralJobs = 0
        fnalJobs = 0
        for job in result[0].jobs:
            if job["possiblePSN"] == set(["T1_UK_RAL"]):
                ralJobs += 1
            elif job["possiblePSN"] == set(["T1_US_FNAL"]):
                fnalJobs += 1

        self.assertEqual(ralJobs, 1)
        self.assertEqual(fnalJobs, 2)

        return
コード例 #15
0
ファイル: FixedDelay_t.py プロジェクト: prozober/WMCore
    def testClosed(self):
        """
        _testClosed_

        Since the subscriptions are closed and none of the files have been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        self.multipleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)

        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs), 1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)

        self.multipleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs), 1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
        #self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
コード例 #16
0
    def testHardLimitSplittingOnly(self):
        """
        _testHardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file0", 1000, 0, 1,
                                    "T1_US_FNAL_Disk")
        testFileB = self.createFile("/this/is/file1", 1000, 1, 1,
                                    "T1_US_FNAL_Disk")
        testFileC = self.createFile("/this/is/file2", 1000, 2, 1,
                                    "T1_US_FNAL_Disk")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # Fail single lumis with more than 800 events and put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        for i in range(3):
            num = list(jobs[i]['mask']['runAndLumis'])[0]
            self.assertTrue(jobs[i]['failedOnCreation'])
            error = 'File /this/is/file%s has a single lumi %s, in run %s' % (
                num, num, num)
            error += ' with too many events 1000 and it woud take 12000 sec to run'
            self.assertEqual(jobs[i]['failedReason'], error)

        return
コード例 #17
0
ファイル: ChangeState_t.py プロジェクト: samircury/WMCore
    def testUpdateFailedDoc(self):
        """
        _testUpdateFailedDoc_

        Verify that the update function will work correctly and not throw a 500
        error if the doc didn't make it into the database for some reason.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", seName="somese.cern.ch")

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")
        testSubscription.create()

        testFileA = File(lfn="SomeLFNA",
                         events=1024,
                         size=2048,
                         locations=set(["somese.cern.ch"]))
        testFileA.create()
        testFileset.addFile(testFileA)
        testFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Merge"
        testJobA["couch_record"] = str(testJobA["id"])

        change.propagate([testJobA], "new", "none")
        testJobADoc = change.jobsdatabase.document(testJobA["couch_record"])

        self.assertTrue(testJobADoc.has_key("states"))
        self.assertTrue(testJobADoc["states"].has_key("1"))
        return
コード例 #18
0
ファイル: EventBased_t.py プロジェクト: todor-ivanov/WMCore
    def testACDCNonSequential_v2(self):
        """
        _testACDCNonSequential_v2_
        Test the ability of the EventBased algorithm to create the proper jobs
        given job information from the ACDCServer using non-sequential and irregular
        (diff number of lumis per job) lumi distribution (new version of ACDC docs)
        """
        eventsPerJob = 700
        eventsPerLumi = 200
        lumisPerJob = 4

        self.populateACDCFakeFile(acdcVer=2)

        mcSubscription = self.generateFakeMCFile(3500, 1, 0, 1, 1000, 0)
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=mcSubscription)

        jobGroups = jobFactory(events_per_job=eventsPerJob, events_per_lumi=eventsPerLumi,
                               collectionName="ACDC_TestEventBased",
                               couchURL=self.couchUrl, couchDB=self.couchDBName,
                               filesetName="/ACDC_TestEventBased/Production",
                               performance=self.performanceParams)

        self.assertEqual(1, len(jobGroups))
        jobGroup = jobGroups[0]
        self.assertEqual(5, len(jobGroup.jobs))

        for jobNum, lRange in enumerate([[197, 198], [277, 281], [337, 338], [421, 422], [529, 530]]):
            job = jobGroup.jobs[jobNum]
            self.assertEqual(1, len(job["input_files"]))
            mask = job["mask"]
            self.assertEqual(mask.getMaxEvents(), eventsPerJob)
            self.assertEqual(mask.getMax("Event"), eventsPerJob)
            if lRange[0] == 277:
                self.assertEqual(mask.getMax("Lumi"), lumisPerJob)
            else:
                self.assertEqual(mask.getMax("Lumi"), 1)
            self.assertEqual(mask.getMax("Run"), 1)
            self.assertEqual(mask["FirstLumi"], lRange[0])
            self.assertEqual(mask["LastLumi"], lRange[1] - 1)
            self.assertEqual(mask["LastEvent"] - mask["FirstEvent"], eventsPerJob - 1)
            self.assertEqual(mask["runAndLumis"], {})
            self.assertEqual(job["estimatedJobTime"], eventsPerJob * 12)
            self.assertEqual(job["estimatedDiskUsage"], eventsPerJob * 400)
            self.assertEqual(job["estimatedMemoryUsage"], 2300)

        return
コード例 #19
0
ファイル: MinFileBased_t.py プロジェクト: vytjan/WMCore
    def testB_LessFilesOpen(self):
        """
        _LessFilesOpen_

        Test with less files then required.
        If the fileset is open, this should produce no jobs.
        """

        sub = self.createTestSubscription(nFiles=5)
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS", subscription=sub)

        jobGroups = jobFactory(files_per_job=10)

        self.assertEqual(len(jobGroups), 0)
        return
コード例 #20
0
ファイル: SizeBased_t.py プロジェクト: vytjan/WMCore
    def testFiles500(self):
        """
        _testMultipleFiles_

        Tests the mechanism for splitting up multiple files into jobs with
        a variety of different arguments.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)

        #Test it with something too small to handle; should return one job per file
        jobGroups = jobFactory(size_per_job=500)
        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 10)

        return
コード例 #21
0
    def testMultipleSites(self):
        """
        _testMultipleSites_

        Tests how to break up files at different locations
        """

        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleSiteSubscription)

        jobGroups = jobFactory(size_per_job=1000)

        self.assertEqual(len(jobGroups), 2)
        self.assertEqual(len(jobGroups[0].jobs), 5)
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job.getFiles()), 1)
コード例 #22
0
    def testE_HardLimitSpltting(self):
        """
        _testE_HardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        failedJobs = [
            job for job in jobs if job.get('failedOnCreation', False)
        ]
        self.assertEqual(len(failedJobs), 1)
        self.assertEqual(
            failedJobs[0]['failedReason'],
            'File /this/is/file2 has a single lumi 1, in run 1 with too many events 1000 and it woud take 12000 sec to run'
        )

        return
コード例 #23
0
    def testHardLimitSplittingOnly(self):
        """
        _testHardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1,
                                    "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1,
                                    "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1,
                                    "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Fail single lumis with more than 800 events and put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        for job in jobs:
            self.assertTrue(job['failedOnCreation'])
            self.assertIn(
                ' with too many events 1000 and it woud take 12000 sec to run',
                job['failedReason'])

        return
コード例 #24
0
    def testFilesWithoutOtherSubscriptions(self):
        """
        _testFilesWithoutOtherSubscriptions_

        Test the case where files only in the delete subscription
        can happen if cleanup of the other subscriptions is fast

        """
        testWorkflowA = Workflow(spec="specA.xml",
                                 owner="Steve",
                                 name="wfA",
                                 task="Test")
        testWorkflowA.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i),
                            size=1000,
                            events=100,
                            locations=set(["T2_CH_CERN"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(fileset=testFileset,
                                         workflow=testWorkflowA,
                                         split_algo="SiblingProcessingBased",
                                         type="Processing")
        testSubscriptionA.create()

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package="WMCore.WMBS",
                                  subscription=testSubscriptionA)

        result = deleteFactoryA(files_per_job=50)
        self.assertEqual(len(result), 1,
                         "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10,
                         "Error: Wrong number of jobs returned.")

        return
コード例 #25
0
ファイル: FileBased_t.py プロジェクト: lucacopa/WMCore
    def testRespectRunBoundaries(self):
        """
        _testRespectRunBoundaries_

        Test whether or not this thing will respect run boundaries
        """

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job = 10, respect_run_boundaries = True,
                               performance = self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 10)

        return
コード例 #26
0
ファイル: MinFileBased_t.py プロジェクト: vytjan/WMCore
    def testA_ExactFiles(self):
        """
        _testExactFiles_

        Test file based job splitting when the number of files per job is
        exactly the same as the number of files in the input fileset.
        """
        nFiles = 5
        sub = self.createTestSubscription(nFiles=nFiles)
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS", subscription=sub)

        jobGroups = jobFactory(files_per_job=nFiles)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 1)
        self.assertEqual(len(jobGroups[0].jobs[0]['input_files']), nFiles)
        return
コード例 #27
0
ファイル: MinFileBased_t.py プロジェクト: vytjan/WMCore
    def testC_LessFilesClosed(self):
        """
        _LessFilesClosed_

        Test with less files then required.
        If the fileset is closed, this should produce one job.
        """

        sub = self.createTestSubscription(nFiles=5, closeFileset=True)
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS", subscription=sub)

        jobGroups = jobFactory(files_per_job=10)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 1)
        self.assertEqual(len(jobGroups[0].jobs[0]['input_files']), 5)
        return
コード例 #28
0
    def testI_DisableHardLimitSplitting(self):
        """
        _testI_DisableHardLimitSplitting_

        Test that we can bypass the job time limit when allowCreationFailure is
        set to False. The algorithm shall take single lumi files with time per
        lumi greater than the job time limit but not mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               allowCreationFailure=False,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        failedJobs = [
            job for job in jobs if job.get('failedOnCreation', False)
        ]
        self.assertEqual(len(failedJobs), 0, "There should be no failed jobs")

        return
コード例 #29
0
    def testMultipleFiles2000(self):
        """
        _testMultipleFiles2000_

        Tests the mechanism for splitting up multiple files into jobs with
        a variety of different arguments.
        """

        splitter = SplitterFactory()
        jobFactory = splitter(self.multipleFileSubscription)
        #Test it with two files per job
        jobGroups = jobFactory(size_per_job=2000)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 5)
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job.getFiles()), 2)

        return
コード例 #30
0
ファイル: MinFileBased_t.py プロジェクト: vytjan/WMCore
    def testD_MoreFilesOpen(self):
        """
        _MoreFilesOpen_

        If you pass it more files then files_per_job, it should produce
        jobs until it hits the limit, then stop.
        """

        sub = self.createTestSubscription(nFiles=10)
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS", subscription=sub)

        jobGroups = jobFactory(files_per_job=3)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 3)
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job['input_files']), 3)
        return