Esempio n. 1
0
    def __init__(self,
                 fileset=None,
                 workflow=None,
                 id=-1,
                 split_algo="FileBased",
                 type="Processing"):
        WMBSBase.__init__(self)

        # If a fileset or workflow isn't passed in the base class will create
        # empty non-WMBS filesets and workflows.  We want WMBS filesets and
        # workflows so we'll create those here.
        if fileset == None:
            fileset = Fileset()
        if workflow == None:
            workflow = Workflow()

        WMSubscription.__init__(self,
                                fileset=fileset,
                                workflow=workflow,
                                split_algo=split_algo,
                                type=type)

        self.setdefault("id", id)

        self.bulkDeleteLimit = 500
        return
Esempio n. 2
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('blenheim')
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        return
Esempio n. 3
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["somese.cern.ch"]))
        self.singleFileFileset.addFile(newFile)

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.setLocation("somese.cern.ch")
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation(["somese.cern.ch", "otherse.cern.ch"])
            self.multipleSiteFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        return
Esempio n. 4
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45+i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)


        self.multipleFileRunset = Fileset(name = "TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(i/3, *[45]))
            self.multipleFileRunset.addFile(newFile)

        self.singleRunFileset = Fileset(name = "TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45]))
            self.singleRunFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "RunBased",
                                                     type = "Processing")
        self.singleFileSubscription   = Subscription(fileset = self.singleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "RunBased",
                                                     type = "Processing")
        self.multipleRunSubscription  = Subscription(fileset = self.multipleFileRunset,
                                                     workflow = testWorkflow,
                                                     split_algo = "RunBased",
                                                     type = "Processing")
        self.singleRunSubscription    = Subscription(fileset = self.singleRunFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "RunBased",
                                                     type = "Processing")


        return
Esempio n. 5
0
    def testProductionRunNumber(self):
        """
        _testProductionRunNumber_

        Verify that jobs created by production subscritpions have the correct
        run number is their job mask.  Also verify that non-production
        subscriptions don't have modified run numbers.
        """
        testWorkflow = Workflow(spec="spec.pkl",
                                owner="Steve",
                                name="TestWorkflow",
                                task="TestTask")

        testFileset = Fileset(name="TestFileset")
        testFile = File(lfn="someLFN")
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased",
                                        type="Production")

        myJobFactory = JobFactory(subscription=testSubscription)
        testJobGroups = myJobFactory()

        self.assertTrue(len(testJobGroups) > 0)
        for testJobGroup in testJobGroups:
            self.assertTrue(len(testJobGroup.jobs) > 0)
            for job in testJobGroup.jobs:
                self.assertEqual(job["mask"]["FirstRun"], 1,
                                 "Error: First run is wrong.")
                self.assertEqual(job["mask"]["LastRun"], 1,
                                 "Error: Last run is wrong.")

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased",
                                        type="Processing")

        myJobFactory = JobFactory(subscription=testSubscription)
        testJobGroups = myJobFactory()

        for testJobGroup in testJobGroups:
            for job in testJobGroup.jobs:
                self.assertEqual(job["mask"]["FirstRun"], None,
                                 "Error: First run is wrong.")
                self.assertEqual(job["mask"]["LastRun"], None,
                                 "Error: Last run is wrong.")

        return
Esempio n. 6
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45+i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name = "TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45+i/3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name = "TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)
            

        testWorkflow = Workflow()
        self.multipleFileSubscription  = Subscription(fileset = self.multipleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleFileSubscription    = Subscription(fileset = self.singleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.multipleLumiSubscription  = Subscription(fileset = self.multipleFileLumiset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleLumiSubscription    = Subscription(fileset = self.singleLumiFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")


        return
Esempio n. 7
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('se01')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('se02')
        self.singleFileFileset.addFile(newFile)

        self.emptyFileFileset = Fileset(name="TestFileset3")
        newFile = File("/some/file/name", size=1000, events=0)
        newFile.setLocation('se03')
        self.emptyFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.emptyFileSubscription = Subscription(
            fileset=self.emptyFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")

        self.eventsPerJob = 100
        self.performanceParams = {
            'timePerEvent': None,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }

        return
Esempio n. 8
0
    def testF_HardLimitSplittingOnly(self):
        """
        _testF_HardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1,
                                    "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1,
                                    "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1,
                                    "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
        for i in range(0, 3):
            self.assertTrue(jobs[i]['failedOnCreation'],
                            "It should have been marked as failed")

            runNums = jobs[i]['mask']['runAndLumis'].keys()
            self.assertEqual(len(runNums), 1)

            lumiNums = jobs[i]['mask']['runAndLumis'].values()[0]
            self.assertEqual(len(lumiNums), 1)

            finalLumi = []
            for pair in lumiNums:
                finalLumi.extend(range(pair[0], pair[1] + 1))
            self.assertEqual(len(finalLumi), 1)

            self.assertEqual(
                jobs[i]['failedReason'],
                "File /this/is/file%d has a single lumi %s, in run %s with too many events 1000 and it woud take 12000 sec to run"
                % (i + 1, finalLumi[0], runNums[0]))

        return
    def testHardLimitSplittingOnly(self):
        """
        _testHardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1, "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1, "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)

        # Fail single lumis with more than 800 events and put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True, splitOnRun=True, events_per_job=550,
                               job_time_limit=9600, performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        for job in jobs:
            self.assertTrue(job['failedOnCreation'])
            self.assertIn(' with too many events 1000 and it woud take 12000 sec to run', job['failedReason'])

        return
Esempio n. 10
0
    def testG_LumiMask(self):
        """
        _testG_LumiMask_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=850,
                               runs=['1', '2', '4'],
                               lumis=['10,14', '20,21', '40,41'],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[10, 14]], 2: [[20, 21]], 4: [[40, 40]]})
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {4: [[41, 41]]})
Esempio n. 11
0
    def execute(self, *args, **kwargs):

        wmwork = Workflow(name=kwargs['task']['tm_taskname'])

        wmsubs = Subscription(fileset=args[0], workflow=wmwork,
                               split_algo=kwargs['task']['tm_split_algo'],
                               type=self.jobtypeMapper[kwargs['task']['tm_job_type']])
        splitter = SplitterFactory()
        jobfactory = splitter(subscription=wmsubs)
        splitparam = kwargs['task']['tm_split_args']
        splitparam['algorithm'] = kwargs['task']['tm_split_algo']
        factory = jobfactory(**splitparam)
        if len(factory) == 0:
            # Understanding that no jobs could be created given the splitting arguments
            # with the given input dataset information: NO IDEA WHY.
            # NB: we assume that split can't happen, then task is failed
            msg = "Splitting %s on %s with %s does not generate any job" %(kwargs['task']['tm_taskname'],
                                                                           kwargs['task']['tm_input_dataset'],
                                                                           kwargs['task']['tm_split_algo'])
            self.logger.error("Setting %s as failed" % str(kwargs['task']['tm_taskname']))
            configreq = {'workflow': kwargs['task']['tm_taskname'],
                         'status': "FAILED",
                         'subresource': 'failure',
                         'failure': b64encode(msg)}
            self.server.post(self.resturl, data = urllib.urlencode(configreq))
            raise StopHandler(msg)
        return Result(task=kwargs['task'], result=factory)
Esempio n. 12
0
    def createSubscription(self,
                           nFiles,
                           lumisPerFile,
                           twoSites=False,
                           nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i),
                                          nEventsPerFile, i, lumisPerFile,
                                          'malpaquet')
                testFileset.addFile(newFile)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        return testSubscription
Esempio n. 13
0
    def generateFakeMCFile(self,
                           numEvents=100,
                           firstEvent=1,
                           lastEvent=100,
                           firstLumi=1,
                           lastLumi=10,
                           existingSub=None):
        # MC comes with only one MCFakeFile
        newFile = File("MCFakeFileTest", size=1000, events=numEvents)
        newFile.setLocation('se01')
        if firstLumi == lastLumi:
            newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        else:
            newFile.addRun(Run(1, *range(firstLumi, lastLumi)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent

        if existingSub is None:
            singleMCFileset = Fileset(name="MCTestFileset")
            singleMCFileset.addFile(newFile)
            testWorkflow = Workflow()
            existingSub = Subscription(fileset=singleMCFileset,
                                       workflow=testWorkflow,
                                       split_algo="EventBased",
                                       type="Production")
        else:
            existingSub['fileset'].addFile(newFile)

        return existingSub
Esempio n. 14
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            lumis = []
            for lumi in range(20):
                lumis.append((i * 100) + lumi)
                newFile.addRun(Run(i, *lumis))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('blenheim')
        lumis = list(range(50, 60)) + list(range(70, 80))
        newFile.addRun(Run(13, *lumis))
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        self.performanceParams = {
            'timePerEvent': 12,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }

        return
Esempio n. 15
0
    def setUp(self):
        """
        Initial Setup for Subscription Testcase

        Set a dummy Subscription with a fileset composed of one file inside it
        and a dummy workflow using the default constructor of the Workflow class

        """
        self.dummyFile = File('/tmp/dummyfile', 9999, 0, 0, 0)
        self.dummySet = set()
        self.dummySet.add(self.dummyFile)
        self.dummyFileSet = Fileset(name='SubscriptionTestFileset',
                                    files=self.dummySet)
        self.dummyWorkFlow = Workflow()
        self.dummySubscription = Subscription(fileset=self.dummyFileSet,
                                              workflow=self.dummyWorkFlow)
        return
Esempio n. 16
0
    def __init__(self, fileset = None, workflow = None, id = -1,
                 split_algo = "FileBased", type = "Processing"):
        WMBSBase.__init__(self)

        # If a fileset or workflow isn't passed in the base class will create
        # empty non-WMBS filesets and workflows.  We want WMBS filesets and
        # workflows so we'll create those here.
        if fileset == None:
            fileset = Fileset()
        if workflow == None:
            workflow = Workflow()
            
        WMSubscription.__init__(self, fileset = fileset, workflow = workflow,
                                split_algo = split_algo, type = type)

        self.setdefault("id", id)

        self.bulkDeleteLimit = 500
        return
Esempio n. 17
0
    def processDataset(self):
        """
        _processDataset_

        Import the Dataset contents and create a set of jobs from it

        """

        #  //
        # // Now create the job definitions
        #//
        logging.debug("SplitSize = %s" % self.splitSize)
        logging.debug("AllowedSites = %s" % self.allowedSites)
        thefiles = Fileset(name='FilesToSplit')
        reader = DBSReader(self.dbsUrl)
        fileList = reader.dbs.listFiles(
            analysisDataset=self.inputDataset(),
            retriveList=['retrive_block', 'retrive_run'])

        blocks = {}

        for f in fileList:
            block = f['Block']['Name']
            if not blocks.has_key(block):
                blocks[block] = reader.listFileBlockLocation(block)
            f['Block']['StorageElementList'].extend(blocks[block])
            wmbsFile = File(f['LogicalFileName'])
            [wmbsFile['locations'].add(x) for x in blocks[block]]
            wmbsFile['block'] = block
            thefiles.addFile(wmbsFile)

        work = Workflow()
        subs = Subscription(fileset=thefiles,
                            workflow=work,
                            split_algo='FileBased',
                            type="Processing")
        splitter = SplitterFactory()
        jobfactory = splitter(subs)

        jobs = jobfactory(files_per_job=self.splitSize)

        jobDefs = []
        for job in jobs.jobs:
            #job.mask.setMaxAndSkipEvents(-1, 0)
            jobDef = JobDefinition()
            jobDef['LFNS'].extend(job.listLFNs())
            jobDef['SkipEvents'] = 0
            jobDef['MaxEvents'] = -1
            [
                jobDef['SENames'].extend(list(x['locations']))
                for x in job.listFiles()
            ]
            jobDefs.append(jobDef)

        return jobDefs
Esempio n. 18
0
    def execute(self, *args, **kwargs):
        wmwork = Workflow(name=kwargs['task']['tm_taskname'])

        wmsubs = Subscription(
            fileset=args[0],
            workflow=wmwork,
            split_algo=kwargs['task']['tm_split_algo'],
            type=self.jobtypeMapper[kwargs['task']['tm_job_type']])
        splitter = SplitterFactory()
        jobfactory = splitter(subscription=wmsubs)
        splitparam = kwargs['task']['tm_split_args']
        splitparam['algorithm'] = kwargs['task']['tm_split_algo']
        if kwargs['task']['tm_job_type'] == 'Analysis':
            if kwargs['task']['tm_split_algo'] == 'FileBased':
                splitparam['total_files'] = kwargs['task']['tm_totalunits']
            elif kwargs['task']['tm_split_algo'] == 'LumiBased':
                splitparam['total_lumis'] = kwargs['task']['tm_totalunits']
        elif kwargs['task']['tm_job_type'] == 'PrivateMC':
            if 'tm_events_per_lumi' in kwargs['task'] and kwargs['task'][
                    'tm_events_per_lumi']:
                splitparam['events_per_lumi'] = kwargs['task'][
                    'tm_events_per_lumi']
            if 'tm_generator' in kwargs['task'] and kwargs['task'][
                    'tm_generator'] == 'lhe':
                splitparam['lheInputFiles'] = True
        splitparam['applyLumiCorrection'] = True
        factory = jobfactory(**splitparam)
        if len(factory) == 0:
            raise TaskWorkerException("The CRAB3 server backend could not submit any job to the Grid scheduler:\n"+\
                        "splitting task %s on dataset %s with %s method does not generate any job")
        #printing duplicated lumis if any
        lumiChecker = getattr(jobfactory, 'lumiChecker', None)
        if lumiChecker and lumiChecker.splitLumiFiles:
            self.logger.warning(
                "The input dataset contains the following duplicated lumis %s"
                % lumiChecker.splitLumiFiles.keys())
            try:
                configreq = {
                    'subresource':
                    'addwarning',
                    'workflow':
                    kwargs['task']['tm_taskname'],
                    'warning':
                    b64encode(
                        'The CRAB3 server backend detected lumis split across files in the input dataset.'
                        ' Will apply the necessary corrections in the splitting algorithms'
                    )
                }
                self.server.post(self.restURInoAPI + '/task',
                                 data=urllib.urlencode(configreq))
            except Exception, e:
                self.logger.error(e.headers)
                self.logger.warning(
                    "Cannot add warning to REST after finding duplicates")
Esempio n. 19
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('se01')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('se02')
        self.singleFileFileset.addFile(newFile)

        self.emptyFileFileset = Fileset(name="TestFileset3")
        newFile = File("/some/file/name", size=1000, events=0)
        newFile.setdefault('se03')
        self.emptyFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")
        self.emptyFileSubscription = Subscription(
            fileset=self.emptyFileFileset,
            workflow=testWorkflow,
            split_algo="EventBased",
            type="Processing")

        return
Esempio n. 20
0
    def testF_HardLimitSplittingOnly(self):
        """
        _testF_HardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        #Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 1,
                                    "somese.cern.ch")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1,
                                    "somese.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 1,
                                    "somese.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        #Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        #and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               max_events_per_lumi=800,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3, "Three jobs must be in the jobgroup")
        for i in range(1, 4):
            self.assertTrue(
                jobs[i - 1]['failedOnCreation'],
                "The job processing the second file should me marked for failure"
            )
            self.assertEqual(
                jobs[i - 1]['failedReason'],
                "File /this/is/file%d has too many events (1000) in 1 lumi(s)"
                % i, "The reason for the failure is not accurate")

        return
Esempio n. 21
0
    def testHardLimitSplitting(self):
        """
        _testHardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Settings are to split on job boundaries, to fail single lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 5)

        # One job should be failed, the rest should be fine
        for jobNum in (0, 1, 3, 4):
            self.assertFalse(jobs[jobNum].get('failedOnCreation'))
        self.assertTrue(jobs[2]['failedOnCreation'])
        self.assertEqual(
            jobs[2]['failedReason'],
            'File /this/is/file2 has a single lumi 1, in run 1 with too many events 1000 and it woud take 12000 sec to run'
        )

        return
Esempio n. 22
0
 def generateFakeMCFile(self, numEvents = 100, firstEvent = 1,
                        lastEvent = 100, firstLumi = 1, lastLumi = 10):
     #MC comes with only one MCFakeFile
     singleMCFileset = Fileset(name = "MCTestFileset")
     newFile = File("MCFakeFileTest", size = 1000, events = numEvents)
     newFile.setLocation('se01')
     newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
     newFile["first_event"] = firstEvent
     newFile["last_event"] = lastEvent
     testWorkflow = Workflow()
     singleMCFileset.addFile(newFile)
     singleMCFileSubscription = Subscription(fileset = singleMCFileset,
                                             workflow = testWorkflow,
                                             split_algo = "EventBased",
                                             type = "Production")
     return singleMCFileSubscription
Esempio n. 23
0
    def testCreate(self):
        """
        _testCreate_

        Test the JobGroup constructor and passing different job containers
        into it.
        """
        testSubscription = Subscription()
        testJobGroupA = JobGroup(subscription = testSubscription)

        assert testJobGroupA.subscription == testSubscription, \
            "ERROR: Failed to pass subscription in constructor"
        assert len(testJobGroupA.jobs) == 0 and len(testJobGroupA.newjobs) == 0, \
            "ERROR: JobGroup not empty on creation"

        testJobA = Job()
        testJobB = Job()

        testJobGroupB = JobGroup(jobs = [testJobA, testJobB])

        assert testJobGroupB.jobs == [], \
            "ERROR: Jobs committed to jobgroup too soon."

        jobGroupJobs = testJobGroupB.newjobs
        goldenJobs = [testJobA, testJobB]
        for job in jobGroupJobs:
            assert job in goldenJobs, \
                "ERROR: Extra job in job group"

            goldenJobs.remove(job)

        assert len(goldenJobs) == 0, \
            "ERROR: Job missing from job group"

        testJobGroupC = JobGroup(jobs = testJobA)

        assert testJobGroupC.jobs == [], \
            "ERROR: Jobs committed to jobgroup too soon."

        jobGroupJobs = testJobGroupC.newjobs

        assert len(jobGroupJobs) == 1, \
            "ERROR: Wrong number of jobs in jobgroup."
        assert testJobA in jobGroupJobs, \
            "ERROR: Wrong job in jobgroup."

        return
Esempio n. 24
0
    def setUp(self):
        """
        Initial Setup for Subscription Testcase

        Set a dummy Subscription with a fileset composed of one file inside it
        and a dummy workflow using the default constructor of the Workflow class

        """
        self.dummyFile = File('/tmp/dummyfile',9999,0,0,0)
        self.dummySet = set()
        self.dummySet.add(self.dummyFile)
        self.dummyFileSet = Fileset(name = 'SubscriptionTestFileset',
                                    files = self.dummySet)
        self.dummyWorkFlow = Workflow()
        self.dummySubscription = Subscription(fileset = self.dummyFileSet,
                                              workflow = self.dummyWorkFlow)
        return
Esempio n. 25
0
    def testE_HardLimitSpltting(self):
        """
        _testE_HardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        #Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        #Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        #and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               max_events_per_lumi=800,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        self.assertTrue(
            jobs[3]['failedOnCreation'],
            "The job processing the second file should me marked for failure")
        self.assertEqual(
            jobs[3]['failedReason'],
            "File /this/is/file2 has too many events (1000) in 1 lumi(s)",
            "The reason for the failure is not accurate")

        return
Esempio n. 26
0
    def testI_DisableHardLimitSplitting(self):
        """
        _testI_DisableHardLimitSplitting_

        Test that we can bypass the job time limit when allowCreationFailure is
        set to False. The algorithm shall take single lumi files with time per
        lumi greater than the job time limit but not mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        # Settings are to split on job boundaries, to fail sing lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               allowCreationFailure=False,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        failedJobs = [
            job for job in jobs if job.get('failedOnCreation', False)
        ]
        self.assertEqual(len(failedJobs), 0, "There should be no failed jobs")

        return
Esempio n. 27
0
    def createSubscriptions(self, task, fileset):
        """
        Create a subscription based on a task


        """
        taskType = task.taskType()
        work = task.makeWorkflow()

        sub = Subscription(fileset=fileset,
                           workflow=work,
                           split_algo="FileBased",
                           type=taskType)

        package = self.createWMBSJobs(subscription=sub, task=task)

        packName = os.path.join(self.testDir, 'packages',
                                '%sJobPackage.pkl' % (task.name()))
        package.save(packName)

        return sub
Esempio n. 28
0
    def testMetaData(self):
        """
        _testMetaData_

        Make sure that the workflow name, task, owner and white and black lists
        make it into each job object.
        """
        testWorkflow = Workflow(spec="spec.pkl",
                                owner="Steve",
                                name="TestWorkflow",
                                task="TestTask")

        testFileset = Fileset(name="TestFileset")
        testFile = File(lfn="someLFN")
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")

        myJobFactory = JobFactory(subscription=testSubscription)
        testJobGroups = myJobFactory(siteWhitelist=["site1"],
                                     siteBlacklist=["site2"])
        self.assertTrue(len(testJobGroups) > 0)

        for testJobGroup in testJobGroups:
            self.assertTrue(len(testJobGroup.jobs) > 0)
            for job in testJobGroup.jobs:
                self.assertEqual(job["task"], "TestTask",
                                 "Error: Task is wrong.")
                self.assertEqual(job["workflow"], "TestWorkflow",
                                 "Error: Workflow is wrong.")
                self.assertEqual(job["owner"], "Steve",
                                 "Error: Owner is wrong.")
                self.assertEqual(job["siteWhitelist"], ["site1"],
                                 "Error: Site white list is wrong.")
                self.assertEqual(job["siteBlacklist"], ["site2"],
                                 "Error: Site black list is wrong.")
        return
Esempio n. 29
0
    def oneHundredFiles(self,
                        splittingAlgo="EventBased",
                        jobType="Processing"):
        """
        _oneHundredFiles_

        Generate a WMBS data stack representing 100 files for job splitter
        testing

        """
        fileset1 = Fileset(name='EventBasedFiles1')
        for i in range(0, 100):
            f = File(
                "/store/MultipleFileSplit%s.root" % i,  # lfn
                1000,  # size
                100,  # events
                10 + i,  # run
                12312  # lumi
            )
            f['locations'].add("BULLSHIT")

            fileset1.addFile(f)

        work = Workflow()
        subscription1 = Subscription(fileset=fileset1,
                                     workflow=work,
                                     split_algo=splittingAlgo,
                                     type=jobType)
        splitter = SplitterFactory()
        jobfactory = splitter(subscription1)
        jobs = jobfactory(events_per_job=100)
        #for jobGroup in jobs:
        #    yield jobGroup

        self.manager.addGenerator("RandomSeeder", **self.seedlistForRandom)
        self.manager.addGenerator("RunAndLumiSeeder")

        return jobs
Esempio n. 30
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = File(lfn='%s_%i' % (baseName, i), size=1000, events=100)
            lumis = []
            for lumi in range(lumisPerFile):
                lumis.append((i * 100) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.setLocation('blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn='%s_%i_2' % (baseName, i),
                               size=1000,
                               events=100)
                lumis = []
                for lumi in range(lumisPerFile):
                    lumis.append(5 + 10 * (i * 100) +
                                 lumi)  #lumis should be different
                newFile.addRun(Run(i, *lumis))
                newFile.setLocation('malpaquet')
                testFileset.addFile(newFile)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")

        return testSubscription
Esempio n. 31
0
    def testRunWhiteList(self):
        """
        _testRunWhiteList_

        Test that we can use a run white list to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Split with no breaks
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=725,
                               runWhitelist=[1, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 4])

        # Re-split with a break on runs
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=True,
                               events_per_job=595,
                               runWhitelist=[1, 3, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 4)
        self.enforceLimits(jobs=jobs, runsPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 3, 4])

        # Re-split with a break on files
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=False,
                               events_per_job=595,
                               runWhitelist=[1, 2, 3],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        self.enforceLimits(jobs=jobs, filesPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 2, 3])
Esempio n. 32
0
class FixedDelayTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(i, *[45 + i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name="TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45 + i / 3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name="TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.multipleLumiSubscription = Subscription(
            fileset=self.multipleFileLumiset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleLumiSubscription = Subscription(
            fileset=self.singleLumiFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")

        return

    def tearDown(self):
        """
        _tearDown_

        Nothing to do...
        """
        pass

    def testNone(self):
        """
        _testNone_

        Since the time hasn'tpassed, we shouldn't get any jobs back.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time=int(time.time()) * 2)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time=int(time.time()) * 2)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time=int(time.time()) * 2)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time=int(time.time()) * 2)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        return

    def testClosed(self):
        """
        _testClosed_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        self.multipleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs), 1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)

        self.multipleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs), 1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
        #self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)

    def testAllAcquired(self):
        """
        _testAllAcquired_
        should all return no job groups
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.acquireFiles(
            self.singleFileSubscription.availableFiles())
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [],
                          "Should have returned a null set: %s" % jobGroups)

        self.multipleFileSubscription.acquireFiles(
            self.multipleFileSubscription.availableFiles())
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.multipleLumiSubscription.acquireFiles(
            self.multipleLumiSubscription.availableFiles())
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.acquireFiles(
            self.singleLumiSubscription.availableFiles())
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

    def testClosedSomeAcquired(self):
        """
        _testClosedSomeAcquired_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.multipleFileSubscription.getFileset().markOpen(False)
        self.singleFileSubscription.acquireFiles(
            [self.singleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.multipleFileSubscription.getFileset().markOpen(False)
        self.multipleFileSubscription.acquireFiles(
            [self.multipleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.multipleLumiSubscription.getFileset().markOpen(False)
        self.multipleLumiSubscription.acquireFiles(
            [self.multipleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.singleLumiSubscription.getFileset().markOpen(False)
        self.singleLumiSubscription.acquireFiles(
            [self.singleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time=1)
        self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.assertEquals(len(myfiles), 9)
Esempio n. 33
0
    def testD_NoFileSplitNoHardLimit(self):
        """
        _testD_NoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        #Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100,
                                                   lumisPerFile=7,
                                                   twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        #First test, the optimal settings are 360 events per job
        #As we have files with 0 events per lumi, this will configure the splitting to
        #a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360)
        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1, "There should be 1 job")
        self.assertEqual(len(jobs[0]['input_files']), 100,
                         "All 100 files must be in the job")

        #Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "blenheim")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1, "blenheim")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1, "blenheim")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1, "blenheim")
        testFileG = self.createFile("/this/is/file7", 151, 6, 3, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        #Optimal settings are: jobs with 150 events per job
        #This means, the first file must be splitted in 3 lumis per job which would leave room
        #for another lumi in the second job, but the second file has a lumi too big for that
        #The 3rd job only contains the second file, the fourth and fifth job split the third file
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=150)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 8, "Eight jobs must be in the jobgroup")
        self.assertEqual(jobs[0]["mask"].getRunAndLumis(), {0L: [[0L, 2L]]},
                         "Wrong mask for the first job")
        self.assertEqual(jobs[1]["mask"].getRunAndLumis(), {0L: [[3L, 4L]]},
                         "Wrong mask for the second job")
        self.assertEqual(jobs[2]["mask"].getRunAndLumis(), {1L: [[1L, 1L]]},
                         "Wrong mask for the third job")
        self.assertEqual(jobs[3]["mask"].getRunAndLumis(), {2L: [[4L, 4L]]},
                         "Wrong mask for the fourth job")
        self.assertEqual(jobs[4]["mask"].getRunAndLumis(), {2L: [[5L, 5L]]},
                         "Wrong mask for the fifth job")
        self.assertEqual(jobs[5]["mask"].getRunAndLumis(), {
            3L: [[3L, 3L]],
            4L: [[4L, 4L]],
            5L: [[5L, 5L]]
        }, "Wrong mask for the sixth job")
        self.assertEqual(jobs[6]["mask"].getRunAndLumis(), {6L: [[18L, 19L]]},
                         "Wrong mask for the seventh job")
        self.assertEqual(jobs[7]["mask"].getRunAndLumis(), {6L: [[20L, 20L]]},
                         "Wrong mask for the seventh job")
        #Test interactions of this algorithm with splitOnRun = True
        #Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("malpaquet")

        fileB = self.createFile('/this/is/file2', 200, 3, 5, "malpaquet")

        testFileset = Fileset(name='FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)
        #The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True,
                               halt_job_on_file_boundaries=False,
                               events_per_job=700)
        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
Esempio n. 34
0
class SubscriptionTest(unittest.TestCase):
    """
    _SubscriptionTest_

    Testcase for the Subscription class

    """

    def setUp(self):
        """
        Initial Setup for Subscription Testcase

        Set a dummy Subscription with a fileset composed of one file inside it
        and a dummy workflow using the default constructor of the Workflow class

        """
        self.dummyFile = File('/tmp/dummyfile',9999,0,0,0)
        self.dummySet = set()
        self.dummySet.add(self.dummyFile)
        self.dummyFileSet = Fileset(name = 'SubscriptionTestFileset',
                                    files = self.dummySet)
        self.dummyWorkFlow = Workflow()
        self.dummySubscription = Subscription(fileset = self.dummyFileSet,
                                              workflow = self.dummyWorkFlow)
        return

    def tearDown(self):
        pass

    def testGetWorkflow(self):
        """
        Testcase for the getWorkflow method of the Subscription Class

        """
        assert self.dummySubscription['workflow'] == self.dummyWorkFlow, \
        'Couldn\'t add Workflow to Subscription'

    def testGetFileset(self):
        """
        Testcase for the getFileset method of the Subscription Class

        """
        assert self.dummyFileSet.name == self.dummySubscription['fileset'].name, \
        'Couldn\'t add Fileset to Subscription - name does not match'

        for x in self.dummyFileSet.listNewFiles():
            assert x in self.dummySubscription['fileset'].newfiles, \
            'Couldn\'t add Fileset to Subscription - newFiles Set does not match'

        assert self.dummyFileSet.getFiles(type='set') == \
            self.dummySubscription['fileset'].getFiles(type='set'), \
            'Couldn\'t add Fileset to Subscription - %s Set does not match' % x

    def testAcquireFiles(self):
        """
        Testcase for the acquireFiles method of the Subscription Class

        """
        #Cleaning possible files already occupying the available set
        self.dummySubscription.acquireFiles()

        # First test - Test if initial file (on available set) is inserted in the
        # acquired set - no arguments

        dummyFile2 = File('/tmp/dummyfile2,8888',1,1,1)
        #Insert dummyFile2 into the available files Set at dummySubscription
        self.dummySubscription.available.addFile(dummyFile2)

        S = self.dummySubscription.available.listNewFiles()
        #Check if Set returned by method is the same that was at the previous
        #available FileSet
        assert S == self.dummySubscription.acquireFiles(), \
        'Couldn\'t acquire file using method acquireFiles - (no arguments test)'


        #Second test - Test if target files are inserted at the acquired set

        dummyFileList = set()
        #Populating the dummy List with a random number of files
        for i in range(1, random.randint(100,1000)):
            lfn = '/store/data/%s/%s/file.root' % (random.randint(1000, 9999),
                                              random.randint(1000, 9999))
            size = random.randint(1000, 2000)
            events = 1000
            run = random.randint(0, 2000)
            lumi = random.randint(0, 8)

            file = File(lfn = lfn, size = size, events = events,
                        checksums = {"cksum": "1"})
            file.addRun(Run(run, *[lumi]))
            dummyFileList.add(file)

        #Check if return value is correct - with parameters
        acqFiles = self.dummySubscription.acquireFiles(files = dummyFileList)
        assert acqFiles == dummyFileList,\
                'Return value for acquireFiles method not the acquired files'
        #Check if all files were inserted at subscription acquired files Set
        for x in dummyFileList:
            assert x in self.dummySubscription.acquired.getFiles(type='set'), \
            'Couldn\'t acquire File %s' % x.dict['lfn']

        #Third test - Test if a replicate file is erased from the other Sets,
        #when a file is acquired

        dummyFile3 = File('/tmp/dummyfile3,5555',1,1,1)
        dummyFileList = []
        dummyFileList.append(dummyFile3)

        #Inserting dummyFile3 to be used as an argument, into each of the other file sets
        self.dummySubscription.available.addFile(dummyFile3)
        self.dummySubscription.failed.addFile(dummyFile3)
        self.dummySubscription.completed.addFile(dummyFile3)

        #Run the method acquireFiles
        self.dummySubscription.acquireFiles(files=dummyFileList, size=1)

        #Check if dummyFile3 was inserted at the acquired Set
        assert dummyFile3 in self.dummySubscription.acquired.getFiles(type='set'), \
        'Replicated file could\'nt be inserted at acquired Set'

        #Check if dummyFile3 was erased from all the other Sets
        assert dummyFile3 not in self.dummySubscription.available.getFiles(type='set'), \
        'Acquired file still present at available Set'
        assert dummyFile3 not in self.dummySubscription.failed.getFiles(type='set'), \
        'Acquired file still present at failed Set'
        assert dummyFile3 not in self.dummySubscription.completed.getFiles(type='set'), \
        'Acquired file still present at completed Set'


        #Fourth test - Test if the method works properly if a wrong size number
        #is given as an argument

        #Case 1: size < number of files given as an argument

        dummyFileList = []
        for i in range(90,100):
            dummyFileSize = File('/tmp/dummyfile'+str(i),7656,1,1,1)
            dummyFileList.append(dummyFileSize)

        #Run the method:
        self.dummySubscription.acquireFiles(files=dummyFileList, size=1)
        #Check each file of the List
        for x in dummyFileList:
            assert x in self.dummySubscription.acquired.getFiles(type='set'), \
            'File wasn\'t acquired (lower Size argument test)'

        #Case 2: size = 0

        #Run the method:
        self.dummySubscription.acquireFiles(files=dummyFileList, size=0)
        #Check each file of the List
        for x in dummyFileList:
            assert x in self.dummySubscription.acquired.getFiles(type='set'), \
            'File wasn\'t acquired (zero size argument test)'


    def testCompleteFiles(self):
        """
        Testcase for the completeFiles method of the Subscription Class

        """
        #Cleaning possible files already occupying the available set
        self.dummySubscription.completeFiles([])

        #First test - Test if initial file (on available set) is inserted in the
        #completed set - no arguments

        dummyFile2 = File('/tmp/dummyfile2,8888',1,1,1)
        #Insert dummyFile2 into the available files Set at dummySubscription
        self.dummySubscription.available.addFile(dummyFile2)

        S = self.dummySubscription.availableFiles()
        #complete all files
        self.dummySubscription.completeFiles(S)

        assert len(self.dummySubscription.availableFiles()) == 0, \
        "completed subscription still has %s files, what's up with that?" %\
          len(self.dummySubscription.availableFiles())

        #Second test - Test if target files are inserted at the completed files set

        dummyFileList = []
        #Populating the dummy List with a random number of files
        for i in range(1, random.randint(100,1000)):
            lfn = '/store/data/%s/%s/file.root' % (random.randint(1000, 9999),
                                              random.randint(1000, 9999))
            size = random.randint(1000, 2000)
            events = 1000
            run = random.randint(0, 2000)
            lumi = random.randint(0, 8)

            file = File(lfn = lfn, size = size, events = events,
                        checksums = {"cksum": "1"})
            file.addRun(Run(run, *[lumi]))
            dummyFileList.append(file)
        #Add the new files
        self.dummySubscription.available.addFile(dummyFileList)
        #and complete them
        self.dummySubscription.completeFiles(files = dummyFileList)
        #Check if return value is correct - with parameters
        assert len(self.dummySubscription.availableFiles()) == 0, \
        "completed subscription still has %s files, what's up with that?" %\
          len(self.dummySubscription.availableFiles())

        #Check if all files were inserted at subscription's completed files Set
        for x in dummyFileList:
            assert x in self.dummySubscription.completed.getFiles(type='set'), \
            'Couldn\'t make file completed %s' % x.dict['lfn']

        #Third test - Test if a replicate file is erased from the other Sets,
        #when a file is made completed

        dummyFile3 = File('/tmp/dummyfile3,5555',1,1,1)
        dummyFileList = []
        dummyFileList.append(dummyFile3)

        #Inserting dummyFile3 to be used as an argument, into each of the other
        #file sets
        self.dummySubscription.acquired.addFile(dummyFile3)
        self.dummySubscription.failed.addFile(dummyFile3)
        self.dummySubscription.completed.addFile(dummyFile3)

        #Run the method completeFiles
        self.dummySubscription.completeFiles(files=dummyFileList)

        #Check if dummyFile3 was inserted at the completed Set
        assert dummyFile3 in self.dummySubscription.completed.getFiles(type='set'), \
        'Replicated file could\'nt be inserted at completed Set'

        #Check if dummyFile3 was erased from all the other Sets
        assert dummyFile3 not in self.dummySubscription.acquired.getFiles(type='set'), \
        'Completed file still present at acquired Set'
        assert dummyFile3 not in self.dummySubscription.failed.getFiles(type='set'), \
        'Completed file still present at failed Set'
        assert dummyFile3 not in self.dummySubscription.available.getFiles(type='set'), \
        'Completed file still present at available Set'

    def testFailFiles(self):
        """
        Testcase for the failFiles method of the Subscription Class

        """
        #Cleaning possible files already occupying the available set
        self.dummySubscription.failFiles([])

        #First test - Test if initial file (on available set) is inserted in the
        # failed set - no arguments

        dummyFile2 = File('/tmp/dummyfile2,8888',1,1,1)
        #Insert dummyFile2 into the available files Set at dummySubscription
        self.dummySubscription.available.addFile(dummyFile2)

        S = self.dummySubscription.availableFiles()
        # Fail all files
        self.dummySubscription.failFiles(S)

        assert len(self.dummySubscription.availableFiles()) == 0, \
        "failed subscription still has %s files, what's up with that?" %\
          len(self.dummySubscription.availableFiles())

        #Second test - Test if target files are inserted at the failed set

        dummyFileList = []
        #Populating the dummy List with a random number of files
        for i in range(1, random.randint(100,1000)):
            lfn = '/store/data/%s/%s/file.root' % (random.randint(1000, 9999),
                                              random.randint(1000, 9999))
            size = random.randint(1000, 2000)
            events = 1000
            run = random.randint(0, 2000)
            lumi = random.randint(0, 8)

            file = File(lfn = lfn, size = size, events = events,
                        checksums = {"cksum": "1"})
            file.addRun(Run(run, *[lumi]))
            dummyFileList.append(file)
        #Add the new files
        self.dummySubscription.available.addFile(dummyFileList)
        #and fail them
        self.dummySubscription.failFiles(files = dummyFileList)
        #Check there are no files available - everything should be failed
        assert len(self.dummySubscription.availableFiles()) == 0, \
        "failed subscription still has %s files, what's up with that?" %\
          len(self.dummySubscription.availableFiles())

        #Check if all files were inserted at subscription's failed files Set
        for x in dummyFileList:
            assert x in self.dummySubscription.failed.getFiles(type='set'),\
                 'Couldn\'t make file failed %s' % x.dict['lfn']

        #Third test - Test if a replicate file is erased from the other Sets,
        # when a file is considered failed

        dummyFile3 = File('/tmp/dummyfile3,5555',1,1,1)
        dummyFileList = []
        dummyFileList.append(dummyFile3)

        #Inserting dummyFile3 to be used as an argument, into each of the other
        # file sets
        self.dummySubscription.acquired.addFile(dummyFile3)
        self.dummySubscription.available.addFile(dummyFile3)
        self.dummySubscription.completed.addFile(dummyFile3)

        #Run the method failFiles
        self.dummySubscription.failFiles(files=dummyFileList)

        #Check if dummyFile3 was inserted at the failed Set
        assert dummyFile3 in self.dummySubscription.failed.getFiles(type='set'), \
        'Replicated file could\'nt be inserted at failed Set'

        #Check if dummyFile3 was erased from all the other Sets
        assert dummyFile3 not in self.dummySubscription.acquired.getFiles(type='set'), \
        'Failed file still present at acquired Set'
        assert dummyFile3 not in self.dummySubscription.completed.getFiles(type='set'), \
        'Failed file still present at completed Set'
        assert dummyFile3 not in self.dummySubscription.available.getFiles(type='set'), \
        'Failed file still present at available Set'

    def testFilesOfStatus(self):
        """
        Testcase for the filesOfStatus method of the Subscription Class

        """

        assert self.dummySubscription.filesOfStatus('Available') == \
          self.dummySubscription.available.getFiles(type='set') - \
            self.dummySubscription.acquiredFiles() | self.dummySubscription.completedFiles() | self.dummySubscription.failedFiles(), \
                'Method fileOfStatus(\'AvailableFiles\') does not return available files set'
        assert self.dummySubscription.filesOfStatus('Acquired') == self.dummySubscription.acquired.getFiles(type='set'), \
                'Method fileOfStatus(\'AcquiredFiles\') does not return acquired files set'
        assert self.dummySubscription.filesOfStatus('Completed') == self.dummySubscription.completed.getFiles(type='set'), \
                'Method fileOfStatus(\'CompletedFiles\') does not return completed files set'
        assert self.dummySubscription.filesOfStatus('Failed') == self.dummySubscription.failed.getFiles(type='set'), \
                'Method fileOfStatus(\'FailedFiles\') does not return failed files set'

    def testAvailableFiles(self):
        """
        Testcase for the availableFiles method of the Subscription Class
        """
        assert self.dummySubscription.availableFiles() == \
        self.dummySubscription.available.getFiles(type='set'), \
        'Method availableFiles does not return available files Set'

    def testAcquiredFiles(self):
        """
        Testcase for the acquiredFiles method of the Subscription Class
        """
        assert self.dummySubscription.acquiredFiles() == \
            self.dummySubscription.acquired.getFiles(type='set'), \
            'Method acquiredFiles does not return acquired files Set'

    def testCompletedFiles(self):
        """
        Testcase for the completedFiles method of the Subscription Class
        """
        assert self.dummySubscription.completedFiles() == \
            self.dummySubscription.completed.getFiles(type='set'), \
            'Method completedFiles does not return completed files Set'

    def testFailedFiles(self):
        """
        Testcase for the failedFiles method of the Subscription Class
        """
        assert self.dummySubscription.failedFiles() == \
            self.dummySubscription.failed.getFiles(type='set'), \
            'Method failedFiles does not return failed files Set'
Esempio n. 35
0
class EndOfRunBasedTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45+i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name = "TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45+i/3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name = "TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)
            

        testWorkflow = Workflow()
        self.multipleFileSubscription  = Subscription(fileset = self.multipleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleFileSubscription    = Subscription(fileset = self.singleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.multipleLumiSubscription  = Subscription(fileset = self.multipleFileLumiset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleLumiSubscription    = Subscription(fileset = self.singleLumiFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")


        return

    def tearDown(self):
        pass

    def testNone(self):
        """
        _testNone_
        since the subscriptions are open, we shouldn't get any jobs back
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        return
    
    def testClosed(self):
        """
        _testClosed_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory()
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."
        
        self.multipleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs),1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
        
        self.multipleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups), 1)
        self.assertEquals(len(jobGroups[0].jobs),1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
        #self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory()
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 10)
        
        
    def testAllAcquired(self):
        """
        _testAllAcquired_
        should all return no job groups
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.acquireFiles(
                           self.singleFileSubscription.availableFiles())
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        self.multipleFileSubscription.acquireFiles(
                           self.multipleFileSubscription.availableFiles())
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        self.multipleLumiSubscription.acquireFiles(
                           self.multipleLumiSubscription.availableFiles())
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.acquireFiles(
                           self.singleLumiSubscription.availableFiles())
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
    def testClosedSomeAcquired(self):
        """
        _testClosedSomeAcquired_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.multipleFileSubscription.getFileset().markOpen(False)
        self.singleFileSubscription.acquireFiles(
                           [self.singleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(jobGroups, [], "Should have returned a null set")
        
        self.multipleFileSubscription.getFileset().markOpen(False)
        self.multipleFileSubscription.acquireFiles(
                           [self.multipleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")
        
        self.multipleLumiSubscription.getFileset().markOpen(False)
        self.multipleLumiSubscription.acquireFiles(
                           [self.multipleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")
        
        self.singleLumiSubscription.getFileset().markOpen(False)
        self.singleLumiSubscription.acquireFiles(
                           [self.singleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory()
        self.assertEquals(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEquals(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEquals(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")
        
        self.assertEquals(len(myfiles), 9)