Example #1
0
    def testParallelProcessing(self):
        """
        _testParallelProcessing_

        Verify that merging works correctly when multiple processing
        subscriptions are run over the same input files.  The merging algorithm
        should ignore processing jobs that feed into different merge
        subscriptions.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        mergeFilesetA = Fileset(name="mergeFilesetA")
        mergeFilesetB = Fileset(name="mergeFilesetB")
        mergeFilesetA.create()
        mergeFilesetB.create()

        mergeMergedFilesetA = Fileset(name="mergeMergedFilesetA")
        mergeMergedFilesetB = Fileset(name="mergeMergedFilesetB")
        mergeMergedFilesetA.create()
        mergeMergedFilesetB.create()

        mergeWorkflow = Workflow(name="mergeWorkflow",
                                 spec="bogus",
                                 owner="Steve",
                                 task="Test")
        mergeWorkflow.create()

        mergeSubscriptionA = Subscription(fileset=mergeFilesetA,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionB = Subscription(fileset=mergeFilesetB,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionA.create()
        mergeSubscriptionB.create()

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputFileA = File(lfn="inputLFNA")
        inputFileB = File(lfn="inputLFNB")
        inputFileA.create()
        inputFileB.create()

        procWorkflowA = Workflow(name="procWorkflowA",
                                 spec="bunk2",
                                 owner="Steve",
                                 task="Test")
        procWorkflowA.create()
        procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA)
        procWorkflowB = Workflow(name="procWorkflowB",
                                 spec="bunk3",
                                 owner="Steve",
                                 task="Test2")
        procWorkflowB.create()
        procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB)

        procSubscriptionA = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowA,
                                         split_algo="EventBased")
        procSubscriptionA.create()
        procSubscriptionB = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowB,
                                         split_algo="EventBased")
        procSubscriptionB.create()

        jobGroupA = JobGroup(subscription=procSubscriptionA)
        jobGroupA.create()
        jobGroupB = JobGroup(subscription=procSubscriptionB)
        jobGroupB.create()

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        testJobA = Job()
        testJobA.addFile(inputFileA)
        testJobA.create(jobGroupA)
        testJobA["state"] = "cleanout"
        testJobA["oldstate"] = "new"
        testJobA["couch_record"] = "somejive"
        testJobA["retry_count"] = 0
        testJobA["outcome"] = "success"
        testJobA.save()

        testJobB = Job()
        testJobB.addFile(inputFileB)
        testJobB.create(jobGroupA)
        testJobB["state"] = "cleanout"
        testJobB["oldstate"] = "new"
        testJobB["couch_record"] = "somejive"
        testJobB["retry_count"] = 0
        testJobB["outcome"] = "success"
        testJobB.save()

        testJobC = Job()
        testJobC.addFile(inputFileA)
        testJobC.create(jobGroupB)
        testJobC["state"] = "cleanout"
        testJobC["oldstate"] = "new"
        testJobC["couch_record"] = "somejive"
        testJobC["retry_count"] = 0
        testJobC["outcome"] = "success"
        testJobC.save()

        testJobD = Job()
        testJobD.addFile(inputFileA)
        testJobD.create(jobGroupB)
        testJobD["state"] = "cleanout"
        testJobD["oldstate"] = "new"
        testJobD["couch_record"] = "somejive"
        testJobD["retry_count"] = 0
        testJobD["outcome"] = "failure"
        testJobD.save()

        testJobE = Job()
        testJobE.addFile(inputFileB)
        testJobE.create(jobGroupB)
        testJobE["state"] = "cleanout"
        testJobE["oldstate"] = "new"
        testJobE["couch_record"] = "somejive"
        testJobE["retry_count"] = 0
        testJobE["outcome"] = "success"
        testJobE.save()

        testJobF = Job()
        testJobF.addFile(inputFileB)
        testJobF.create(jobGroupB)
        testJobF["state"] = "cleanout"
        testJobF["oldstate"] = "new"
        testJobF["couch_record"] = "somejive"
        testJobF["retry_count"] = 0
        testJobF["outcome"] = "failure"
        testJobF.save()

        changeStateDAO.execute(
            [testJobA, testJobB, testJobC, testJobD, testJobE, testJobF])

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileA.addRun(Run(1, *[45]))
        fileA.create()
        fileA.addParent(inputFileA["lfn"])
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileB.addRun(Run(1, *[45]))
        fileB.create()
        fileB.addParent(inputFileB["lfn"])

        jobGroupA.output.addFile(fileA)
        jobGroupA.output.addFile(fileB)
        jobGroupA.output.commit()

        mergeFilesetA.addFile(fileA)
        mergeFilesetA.addFile(fileB)
        mergeFilesetA.commit()

        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileC.addRun(Run(1, *[45]))
        fileC.create()
        fileC.addParent(inputFileA["lfn"])
        fileD = File(lfn="fileD",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileD.addRun(Run(1, *[45]))
        fileD.create()
        fileD.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileC)
        jobGroupB.output.addFile(fileD)

        mergeFilesetB.addFile(fileC)
        mergeFilesetB.addFile(fileD)
        mergeFilesetB.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=mergeSubscriptionB)

        result = jobFactory(min_merge_size=1,
                            max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 0, \
               "Error: No merge jobs should have been created."

        fileE = File(lfn="fileE",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileE.addRun(Run(1, *[45]))
        fileE.create()
        fileE.addParent(inputFileA["lfn"])
        fileF = File(lfn="fileF",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileF.addRun(Run(1, *[45]))
        fileF.create()
        fileF.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileE)
        jobGroupB.output.addFile(fileF)

        mergeFilesetB.addFile(fileE)
        mergeFilesetB.addFile(fileF)
        mergeFilesetB.commit()

        testJobD["outcome"] = "success"
        testJobD.save()
        testJobF["outcome"] = "success"
        testJobF.save()

        changeStateDAO.execute([testJobD, testJobF])

        result = jobFactory(min_merge_size=1,
                            max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 1, \
               "Error: One merge job should have been created: %s" % len(result)

        return
Example #2
0
    def testGetOutputMapDAO(self):
        """
        _testGetOutputMapDAO_

        Verify the proper behavior of the GetOutputMapDAO for a variety of
        different processing chains.
        """
        recoOutputFileset = Fileset(name="RECO")
        recoOutputFileset.create()
        mergedRecoOutputFileset = Fileset(name="MergedRECO")
        mergedRecoOutputFileset.create()
        alcaOutputFileset = Fileset(name="ALCA")
        alcaOutputFileset.create()
        mergedAlcaOutputFileset = Fileset(name="MergedALCA")
        mergedAlcaOutputFileset.create()
        dqmOutputFileset = Fileset(name="DQM")
        dqmOutputFileset.create()
        mergedDqmOutputFileset = Fileset(name="MergedDQM")
        mergedDqmOutputFileset.create()
        cleanupFileset = Fileset(name="Cleanup")
        cleanupFileset.create()

        testWorkflow = Workflow(spec="wf001.xml",
                                owner="Steve",
                                name="TestWF",
                                task="None")
        testWorkflow.create()
        testWorkflow.addOutput("output", recoOutputFileset,
                               mergedRecoOutputFileset)
        testWorkflow.addOutput("ALCARECOStreamCombined", alcaOutputFileset,
                               mergedAlcaOutputFileset)
        testWorkflow.addOutput("DQM", dqmOutputFileset, mergedDqmOutputFileset)
        testWorkflow.addOutput("output", cleanupFileset)
        testWorkflow.addOutput("ALCARECOStreamCombined", cleanupFileset)
        testWorkflow.addOutput("DQM", cleanupFileset)

        testRecoMergeWorkflow = Workflow(spec="wf002.xml",
                                         owner="Steve",
                                         name="TestRecoMergeWF",
                                         task="None")
        testRecoMergeWorkflow.create()
        testRecoMergeWorkflow.addOutput("anything", mergedRecoOutputFileset,
                                        mergedRecoOutputFileset)

        testRecoProcWorkflow = Workflow(spec="wf004.xml",
                                        owner="Steve",
                                        name="TestRecoProcWF",
                                        task="None")
        testRecoProcWorkflow.create()

        testAlcaChildWorkflow = Workflow(spec="wf003.xml",
                                         owner="Steve",
                                         name="TestAlcaChildWF",
                                         task="None")
        testAlcaChildWorkflow.create()

        inputFile = File(lfn="/path/to/some/lfn",
                         size=600000,
                         events=60000,
                         locations="cmssrm.fnal.gov")
        inputFile.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()
        testFileset.addFile(inputFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="EventBased",
                                        type="Processing")

        testMergeRecoSubscription = Subscription(
            fileset=recoOutputFileset,
            workflow=testRecoMergeWorkflow,
            split_algo="WMBSMergeBySize",
            type="Merge")
        testProcRecoSubscription = Subscription(fileset=recoOutputFileset,
                                                workflow=testRecoProcWorkflow,
                                                split_algo="FileBased",
                                                type="Processing")

        testChildAlcaSubscription = Subscription(
            fileset=alcaOutputFileset,
            workflow=testAlcaChildWorkflow,
            split_algo="FileBased",
            type="Processing")
        testSubscription.create()
        testMergeRecoSubscription.create()
        testProcRecoSubscription.create()
        testChildAlcaSubscription.create()
        testSubscription.acquireFiles()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job(name="SplitJobA", files=[inputFile])
        testJob.create(group=testJobGroup)
        testJob["state"] = "complete"
        testJob.save()

        outputMapAction = self.daoFactory(classname="Jobs.GetOutputMap")
        outputMap = outputMapAction.execute(jobID=testJob["id"])

        assert len(outputMap.keys()) == 3, \
               "Error: Wrong number of outputs for primary workflow."

        goldenMap = {
            "output": (recoOutputFileset.id, mergedRecoOutputFileset.id),
            "ALCARECOStreamCombined":
            (alcaOutputFileset.id, mergedAlcaOutputFileset.id),
            "DQM": (dqmOutputFileset.id, mergedDqmOutputFileset.id)
        }

        for outputID in outputMap.keys():
            for outputFilesets in outputMap[outputID]:
                if outputFilesets["merged_output_fileset"] == None:
                    self.assertEqual(outputFilesets["output_fileset"],
                                     cleanupFileset.id,
                                     "Error: Cleanup fileset is wrong.")
                    continue

                self.assertTrue(outputID in goldenMap.keys(),
                                "Error: Output identifier is missing.")
                self.assertEqual(outputFilesets["output_fileset"],
                                 goldenMap[outputID][0],
                                 "Error: Output fileset is wrong.")
                self.assertEqual(outputFilesets["merged_output_fileset"],
                                 goldenMap[outputID][1],
                                 "Error: Merged output fileset is wrong.")
                del goldenMap[outputID]

        self.assertEqual(len(goldenMap.keys()), 0,
                         "Error: Missing output maps.")

        return
Example #3
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnn
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """, transaction = False)

        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "Express" },
                                transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : 1,
                                            'STREAM' : "Express",
                                            'TIME' : int(time.time()),
                                            'LFN' : "/streamer",
                                            'FILESIZE' : 0,
                                            'EVENTS' : 0 },
                                  transaction = False)

        insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration")
        insertPromptCalibrationDAO.execute( { 'RUN' : 1,
                                              'STREAM' : "Express" },
                                            transaction = False)

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset1.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Condition",
                                           type = "Condition")
        self.subscription1.create()

        # set parentage chain and sqlite fileset
        alcaRecoFile = File("/alcareco", size = 0, events = 0)
        alcaRecoFile.addRun(Run(1, *[1]))
        alcaRecoFile.setLocation("SomePNN", immediateSave = False)
        alcaRecoFile.create()
        alcaPromptFile = File("/alcaprompt", size = 0, events = 0)
        alcaPromptFile.addRun(Run(1, *[1]))
        alcaPromptFile.setLocation("SomePNN", immediateSave = False)
        alcaPromptFile.create()
        sqliteFile = File("/sqlite", size = 0, events = 0)
        sqliteFile.create()
        self.fileset1.addFile(sqliteFile)
        self.fileset1.commit()

        results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details
                                              """,
                                           transaction = False)[0].fetchall()

        setParentageDAO = wmbsDaoFactory(classname = "Files.SetParentage")
        setParentageDAO.execute(binds = [ { 'parent' : "/streamer",
                                            'child' : "/alcareco" },
                                          { 'parent' : "/alcareco",
                                            'child' : "/alcaprompt" },
                                          { 'parent' : "/alcaprompt",
                                            'child' : "/sqlite" } ],
                                transaction = False)

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['runNumber'] = 1
        self.splitArgs['streamName'] = "Express"

        return
Example #4
0
    def test_AutoIncrementCheck(self):
        """
        _AutoIncrementCheck_

        Test and see whether we can find and set the auto_increment values
        """
        myThread = threading.currentThread()
        if not myThread.dialect.lower() == 'mysql':
            return

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck")
        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 1)

        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 2)

        incrementDAO.execute(input=10)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 11)

        incrementDAO.execute(input=5)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 12)

        return
Example #5
0
    def testFailJobInput(self):
        """
        _testFailJobInput_

        Test the Jobs.FailInput DAO and verify that it doesn't affect other
        jobs/subscriptions that run over the same files.
        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        bogusWorkflow = Workflow(spec="spec1.xml",
                                 owner="Steve",
                                 name="wf002",
                                 task="Test")
        testWorkflow.create()
        bogusWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        bogusFileset = Fileset(name="BogusFileset")
        testFileset.create()
        bogusFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        bogusSubscription = Subscription(fileset=bogusFileset,
                                         workflow=bogusWorkflow)
        testSubscription.create()
        bogusSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="setest.site.ch")
        testFileB = File(lfn=makeUUID(), locations="setest.site.ch")
        testFileC = File(lfn=makeUUID(), locations="setest.site.ch")
        testFileA.create()
        testFileB.create()
        testFileC.create()

        testFileset.addFile([testFileA, testFileB, testFileC])
        bogusFileset.addFile([testFileA, testFileB, testFileC])
        testFileset.commit()
        bogusFileset.commit()

        testSubscription.completeFiles([testFileA, testFileB, testFileC])
        bogusSubscription.acquireFiles([testFileA, testFileB, testFileC])

        testJobGroup = JobGroup(subscription=testSubscription)
        bogusJobGroup = JobGroup(subscription=bogusSubscription)
        testJobGroup.create()
        bogusJobGroup.create()

        testJobA = Job(name="TestJobA",
                       files=[testFileA, testFileB, testFileC])
        testJobB = Job(name="TestJobB",
                       files=[testFileA, testFileB, testFileC])

        bogusJob = Job(name="BogusJob",
                       files=[testFileA, testFileB, testFileC])

        testJobA.create(group=testJobGroup)
        testJobB.create(group=testJobGroup)

        bogusJob.create(group=bogusJobGroup)

        testJobA.failInputFiles()
        testJobB.failInputFiles()

        self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3)
        self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0)

        changeStateAction = self.daoFactory(classname="Jobs.ChangeState")
        testJobB["state"] = "cleanout"
        changeStateAction.execute([testJobB])

        # Try again

        testJobA.failInputFiles()

        # Should now be failed
        self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3)
        self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0)

        # bogus should be unchanged
        self.assertEqual(len(bogusSubscription.filesOfStatus("Available")), 0)
        self.assertEqual(len(bogusSubscription.filesOfStatus("Acquired")), 3)
        self.assertEqual(len(bogusSubscription.filesOfStatus("Failed")), 0)
        self.assertEqual(len(bogusSubscription.filesOfStatus("Completed")), 0)

        return
Example #6
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        An event base splitting algorithm.  All available files are split into a
        set number of events per job.
        """
        eventsPerJob = int(kwargs.get("events_per_job", 100))
        eventsPerLumi = int(kwargs.get("events_per_lumi", eventsPerJob))
        getParents = kwargs.get("include_parents", False)
        lheInput = kwargs.get("lheInputFiles", False)
        collectionName = kwargs.get('collectionName', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))
        acdcFileList = []

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner = kwargs.get('owner')
                group = kwargs.get('group')
                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                acdcFileList = dcs.getProductionACDCInfo(
                    collectionName, filesetName, owner, group)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList\n"
                msg += "Refusing to create any jobs.\n"
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                return

        totalJobs = 0

        locationDict = self.sortByLocation()
        for location in locationDict:
            self.newGroup()
            fileList = locationDict[location]
            getRunLumiInformation = False
            for f in fileList:
                if f['lfn'].startswith("MCFakeFile"):
                    #We have one MCFakeFile, then it needs run information
                    getRunLumiInformation = True
                    break
            if getRunLumiInformation:
                if self.package == 'WMCore.WMBS':
                    loadRunLumi = self.daoFactory(
                        classname="Files.GetBulkRunLumi")
                    fileLumis = loadRunLumi.execute(files=fileList)
                    for f in fileList:
                        lumiDict = fileLumis.get(f['id'], {})
                        for run in lumiDict.keys():
                            f.addRun(run=Run(run, *lumiDict[run]))

            for f in fileList:
                currentEvent = f['first_event']
                eventsInFile = f['events']
                runs = list(f['runs'])
                #We got the runs, clean the file.
                f['runs'] = set()

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                if acdcFileList:
                    if f['lfn'] in [x['lfn'] for x in acdcFileList]:
                        totalJobs = self.createACDCJobs(
                            f, acdcFileList, timePerEvent, sizePerEvent,
                            memoryRequirement, lheInput, eventsPerJob,
                            eventsPerLumi, totalJobs)
                    continue
                elif not f['lfn'].startswith("MCFakeFile"):
                    # Very very uncommon, but it has real input dataset
                    if eventsInFile >= eventsPerJob:
                        while currentEvent < eventsInFile:
                            self.newJob(name=self.getJobName(length=totalJobs))
                            self.currentJob.addFile(f)
                            if eventsPerJob + currentEvent < eventsInFile:
                                jobTime = eventsPerJob * timePerEvent
                                diskRequired = eventsPerJob * sizePerEvent
                                self.currentJob["mask"].setMaxAndSkipEvents(
                                    eventsPerJob, currentEvent)
                            else:
                                jobTime = (eventsInFile -
                                           currentEvent) * timePerEvent
                                diskRequired = (eventsInFile -
                                                currentEvent) * sizePerEvent
                                self.currentJob["mask"].setMaxAndSkipEvents(
                                    None, currentEvent)
                            self.currentJob.addResourceEstimates(
                                jobTime=jobTime,
                                memory=memoryRequirement,
                                disk=diskRequired)
                            logging.debug(
                                "Job created for real input with %s" %
                                self.currentJob)
                            currentEvent += eventsPerJob
                            totalJobs += 1
                    else:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        jobTime = eventsInFile * timePerEvent
                        diskRequired = eventsInFile * sizePerEvent
                        self.currentJob.addResourceEstimates(
                            jobTime=jobTime,
                            memory=memoryRequirement,
                            disk=diskRequired)
                        logging.debug(
                            "Last job created for real input with %s" %
                            self.currentJob)
                        totalJobs += 1
                else:
                    #This assumes there's only one run which is the case for MC
                    lumis = runs[0].lumis
                    (firstLumi, lastLumi) = (min(lumis), max(lumis))
                    currentLumi = firstLumi
                    totalEvents = 0
                    if eventsInFile >= eventsPerJob:
                        while totalEvents < eventsInFile:
                            self.newJob(name=self.getJobName(length=totalJobs))
                            self.currentJob.addFile(f)
                            self.currentJob.addBaggageParameter(
                                "lheInputFiles", lheInput)
                            lumisPerJob = int(
                                ceil(float(eventsPerJob) / eventsPerLumi))
                            #Limit the number of events to a unsigned 32bit int
                            eventsRemaining = eventsInFile - totalEvents
                            if (currentEvent + eventsPerJob -
                                    1) > (2**32 - 1) and (currentEvent +
                                                          eventsRemaining -
                                                          1) > (2**32 - 1):
                                currentEvent = 1
                            if eventsRemaining > eventsPerJob:
                                self.currentJob["mask"].setMaxAndSkipEvents(
                                    eventsPerJob, currentEvent)
                                self.currentJob["mask"].setMaxAndSkipLumis(
                                    lumisPerJob, currentLumi)
                                jobTime = eventsPerJob * timePerEvent
                                diskRequired = eventsPerJob * sizePerEvent
                            else:
                                jobTime = eventsRemaining * timePerEvent
                                diskRequired = eventsRemaining * sizePerEvent
                                lumisPerJob = int(
                                    ceil(
                                        float(eventsRemaining) /
                                        eventsPerLumi))
                                self.currentJob["mask"].setMaxAndSkipEvents(
                                    eventsRemaining, currentEvent)
                                self.currentJob["mask"].setMaxAndSkipLumis(
                                    lumisPerJob, currentLumi)
                            currentLumi += lumisPerJob
                            currentEvent += eventsPerJob
                            totalEvents += eventsPerJob
                            totalJobs += 1
                            self.currentJob.addResourceEstimates(
                                jobTime=jobTime,
                                memory=memoryRequirement,
                                disk=diskRequired)
                    else:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        #For MC we use firstEvent instead of skipEvents so set it to 1
                        #We must check for events going over 2**32 - 1 here too
                        if (eventsInFile + currentEvent - 1) > (2**32 - 1):
                            currentEvent = 1
                        self.currentJob["mask"].setMaxAndSkipEvents(
                            eventsInFile, currentEvent)
                        self.currentJob["mask"].setMaxAndSkipLumis(
                            lastLumi - currentLumi + 1, currentLumi)
                        jobTime = eventsInFile * timePerEvent
                        diskRequired = eventsInFile * sizePerEvent
                        self.currentJob.addResourceEstimates(
                            jobTime=jobTime,
                            memory=memoryRequirement,
                            disk=diskRequired)
                        totalJobs += 1
Example #7
0
    def stuffWMBS(self, workflowURL, name):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "s1", seName = "somese.cern.ch")

        changeStateDAO = self.daoFactory(classname = "Jobs.ChangeState")

        mergeFileset = Fileset(name = "mergeFileset")
        mergeFileset.create()
        bogusFileset = Fileset(name = "bogusFileset")
        bogusFileset.create()

        mergeWorkflow = Workflow(spec = workflowURL, owner = "mnorman",
                                 name = name, task="/TestWorkload/ReReco")
        mergeWorkflow.create()

        mergeSubscription = Subscription(fileset = mergeFileset,
                                         workflow = mergeWorkflow,
                                         split_algo = "ParentlessMergeBySize")
        mergeSubscription.create()
        bogusSubscription = Subscription(fileset = bogusFileset,
                                         workflow = mergeWorkflow,
                                         split_algo = "ParentlessMergeBySize")

        file1 = File(lfn = "file1", size = 1024, events = 1024, first_event = 0,
                     locations = set(["somese.cern.ch"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn = "file2", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn = "file3", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn = "file4", size = 1024, events = 1024,
                     first_event = 3072, locations = set(["somese.cern.ch"]))
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn = "fileA", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn = "fileB", size = 1024, events = 1024,
                     first_event = 1024, locations = set(["somese.cern.ch"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn = "fileC", size = 1024, events = 1024,
                     first_event = 2048, locations = set(["somese.cern.ch"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn = "fileI", size = 1024, events = 1024,
                     first_event = 0, locations = set(["somese.cern.ch"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn = "fileII", size = 1024, events = 1024,
                      first_event = 1024, locations = set(["somese.cern.ch"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn = "fileIII", size = 1024, events = 102400,
                       first_event = 2048, locations = set(["somese.cern.ch"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn = "fileIV", size = 102400, events = 1024,
                      first_event = 3072, locations = set(["somese.cern.ch"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for file in [file1, file2, file3, file4, fileA, fileB, fileC, fileI,
                     fileII, fileIII, fileIV]:
            mergeFileset.addFile(file)
            bogusFileset.addFile(file)

        mergeFileset.commit()
        bogusFileset.commit()

        return
Example #8
0
    def __call__(self, filesets):
        """
        The algorithm itself
        """
        # Update run list
        self.getNewRuns()

        # Do per fileset work, abandon fileset processing on exception
        for fileset in filesets:
            ds = fileset.name
            try:
                # Do per run work
                watchCompleteFiles = []

                for watch in self.watchedRuns:
                    # Ensure watcher has dataset listed
                    watch.addDatasetOfInterest(ds)

                    # Query DBS to find all blocks for this run / dataset
                    (files, blocks, fileInfoMap) = \
                        self.dbsHelper.getFileInfo(watch.run, ds)

                    # Now determine all required parent blocks
                    parentBlocks = set()
                    if fileset.requireParents:
                        parentDs = self.dbsHelper.getParentDataset(ds)
                        parentBlocks = self.dbsHelper.getBlockInfo(
                            watch.run, parentDs)

                    # Final list of all required blocks
                    allBlocks = blocks[:]
                    allBlocks.update(parentBlocks)

                    # Find all sites where all blocks are complete
                    sites = self.phedexHelper.getCompleteSites(blocks)

                    # Get sites with newly completed transfers
                    newSites = watch.getNewSites(ds, sites)

                    if len(newSites) > 0:
                        # Add the files for these blocks to the fileset
                        for file in fileInfoMap:
                            fi = fileInfoMap[file]

                            # First add parent file
                            if fileset.requireParents:
                                parentFile = File(lfn=fi["file.parent"])
                                parentFile.save()
                                parentFile.setLocation(newSites)

                            # Add actual file
                            fileToAdd = File(lfn=file,
                                             size=fi["file.size"],
                                             events=fi["file.events"],
                                             run=watch.run,
                                             umi=fi["file.lumi"])
                            if not fileToAdd.exists(
                            ) and fileset.requireParents:
                                fileToAdd.addParent(fi["file.parent"])

                            # Add new locations but don't persist immediately
                            fileToAdd.setLocations(newSites,
                                                   immediateSave=False)

                            # Add the file to the new file list
                            fileset.addFile(fileToAdd)

                    # Add the site info to the watcher list
                    watchCompleteFiles.append([watch, ds, newSites])

                # Commit the fileset
                fileset.commit()

                # Add the watched runs
                for a in watchCompleteFiles:
                    a[0].addCompletedNodes(a[1], a[2])

            except:
                # Reset the watch list so we re-evaluate next call
                watchCompleteFiles = []

        # Purge old runs
        self.purgeWatchedRuns()
Example #9
0
    def verifyFileMetaData(self, jobID, fwkJobReportFiles):
        """
        _verifyFileMetaData_

        Verify that all the files that were output by a job made it into WMBS
        correctly.  Compare the contents of WMBS to the files in the frameworks
        job report.

        Note that fwkJobReportFiles is a list of DataStructs File objects.
        """
        testJob = Job(id=jobID)
        testJob.loadData()

        inputLFNs = []
        for inputFile in testJob["input_files"]:
            inputLFNs.append(inputFile["lfn"])

        for fwkJobReportFile in fwkJobReportFiles:
            outputFile = File(lfn=fwkJobReportFile["lfn"])
            outputFile.loadData(parentage=1)

            assert outputFile["events"] == int(fwkJobReportFile["events"]), \
                   "Error: Output file has wrong events: %s, %s" % \
                   (outputFile["events"], fwkJobReportFile["events"])
            assert outputFile["size"] == int(fwkJobReportFile["size"]), \
                   "Error: Output file has wrong size: %s, %s" % \
                   (outputFile["size"], fwkJobReportFile["size"])

            for ckType in fwkJobReportFile["checksums"].keys():
                assert ckType in outputFile["checksums"].keys(), \
                       "Error: Output file is missing checksums: %s" % ckType
                assert outputFile["checksums"][ckType] == fwkJobReportFile["checksums"][ckType], \
                       "Error: Checksums don't match."

            assert len(fwkJobReportFile["checksums"].keys()) == \
                   len(outputFile["checksums"].keys()), \
                   "Error: Wrong number of checksums."

            jobType = self.getJobTypeAction.execute(jobID=jobID)
            if jobType == "Merge":
                assert str(outputFile["merged"]) == "True", \
                       "Error: Merge jobs should output merged files."
            else:
                assert outputFile["merged"] == fwkJobReportFile["merged"], \
                       "Error: Output file merged output is wrong: %s, %s" % \
                       (outputFile["merged"], fwkJobReportFile["merged"])

            assert len(outputFile["locations"]) == 1, \
                   "Error: outputfile should have one location: %s" % outputFile["locations"]
            assert list(outputFile["locations"])[0] == list(fwkJobReportFile["locations"])[0], \
                   "Error: wrong location for file."

            assert len(outputFile["parents"]) == len(inputLFNs), \
                   "Error: Output file has wrong number of parents."
            for outputParent in outputFile["parents"]:
                assert outputParent["lfn"] in inputLFNs, \
                       "Error: Unknown parent file: %s" % outputParent["lfn"]

            fwjrRuns = {}
            for run in fwkJobReportFile["runs"]:
                fwjrRuns[run.run] = run.lumis

            for run in outputFile["runs"]:
                assert run.run in fwjrRuns, \
                       "Error: Extra run in output: %s" % run.run

                for lumi in run:
                    assert lumi in fwjrRuns[run.run], \
                           "Error: Extra lumi: %s" % lumi

                    fwjrRuns[run.run].remove(lumi)

                if len(fwjrRuns[run.run]) == 0:
                    del fwjrRuns[run.run]

            assert len(fwjrRuns.keys()) == 0, \
                   "Error: Missing runs, lumis: %s" % fwjrRuns

            testJobGroup = JobGroup(id=testJob["jobgroup"])
            testJobGroup.loadData()
            jobGroupFileset = testJobGroup.output
            jobGroupFileset.loadData()

            assert outputFile["id"] in jobGroupFileset.getFiles(type = "id"), \
                   "Error: output file not in jobgroup fileset."

            if testJob["mask"]["FirstEvent"] == None:
                assert outputFile["first_event"] == 0, \
                       "Error: first event not set correctly: 0, %s" % \
                       outputFile["first_event"]
            else:
                assert testJob["mask"]["FirstEvent"] == outputFile["first_event"], \
                       "Error: last event not set correctly: %s, %s" % \
                       (testJob["mask"]["FirstEvent"], outputFile["first_event"])

        return
Example #10
0
    def testLumiMask(self):
        """
        _testLumiMask_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("T1_US_FNAL_Disk")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("T1_US_FNAL_Disk")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("T1_US_FNAL_Disk")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=850,
                               runs=['1', '2', '4'],
                               lumis=['10,14', '20,21', '40,41'],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
        processedLumis = LumiList()
        for job in jobs:
            processedLumis += LumiList(
                compactList=job['mask'].getRunAndLumis())
        correctLumis = LumiList(compactList={
            1: [[10, 14]],
            2: [[20, 21]],
            4: [[40, 41]]
        })
        self.assertEqual(processedLumis.getCMSSWString(),
                         correctLumis.getCMSSWString())
Example #11
0
    def testLumiCorrections(self):
        """
        _testLumiCorrections_

        Test the splitting algorithm can handle lumis which cross multiple files.
        No need for applyLumiCorrection=True
        """

        splitter = SplitterFactory()
        testSubscription = self.createSubscription(nFiles=2,
                                                   lumisPerFile=2,
                                                   twoSites=False,
                                                   nEventsPerFile=150)
        files = testSubscription.getFileset().getFiles()
        self.assertEqual(len(files), 2)

        # Two files with 2 lumis each: file0 has run0 and lumis 0,1 - file1 has run1 and lumis 2,3 - each 150 events
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        jobGroups = jobFactory(events_per_job=50,
                               halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               performance=self.performanceParams)

        # The splitting algorithm will assume 75 events per lumi so we will have one job per lumi
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 4)

        # Recreate the same subscription as before, but with duplicated lumis
        testFileset = Fileset(name='FilesetA')
        fileA = File(lfn="/this/is/file1", size=1000, events=150)
        lumiListA = [0, 1, 42]
        fileA.addRun(Run(0, *lumiListA))
        fileA.setLocation("T1_US_FNAL_Disk")
        testFileset.addFile(fileA)

        fileB = File(lfn="/this/is/file2", size=1000, events=150)
        lumiListB = [2, 3, 42]
        fileB.addRun(Run(0, *lumiListB))
        fileB.setLocation("T1_US_FNAL_Disk")
        testFileset.addFile(fileB)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(events_per_job=50,
                               halt_job_on_file_boundaries=True,
                               performance=self.performanceParams)

        # Now we will have: file0: Run0 and lumis [0, 1, 42] file1: Run0 and lumis [2, 3, 42]
        # With 50 events per lumi, one job per lumi, one will have two files on lumi 42
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 5)

        n1files = 0
        n2files = 0
        lumi1files = []
        lumi2files = []

        for job in jobs:
            runLumis = job['mask'].getRunAndLumis()
            lumis = runLumis[0]
            self.assertEqual(len(runLumis), 1)
            self.assertEqual(len(lumis), 1)
            self.assertEqual(lumis[0][0],
                             lumis[0][1])  # Make sure only one lumi per job
            if len(job['input_files']) == 1:
                n1files += 1
                lumi1files.append(lumis[0][0])
            elif len(job['input_files']) == 2:
                n2files += 1
                lumi2files.append(lumis[0][0])
            else:
                self.fail("At least one job has nFiles =! 1, 2")

        self.assertEqual(n1files, 4)
        self.assertEqual(n2files, 1)
        self.assertItemsEqual(lumi1files, [0, 1, 2, 3])
        self.assertItemsEqual(lumi2files, [42])
Example #12
0
    def testNoFileSplitNoHardLimit(self):
        """
        _testNoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        # Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100,
                                                   lumisPerFile=7,
                                                   twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # First test, the optimal settings are 360 events per job. As we have files with 0 events per lumi, this will
        # configure the splitting to a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 0)

        # since there are not enough events to be splitted, let's close the fileset and get it going
        fileset = testSubscription.getFileset()
        fileset.markOpen(False)

        # One job in one job group with 100 files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1)
        self.assertEqual(len(jobs[0]['input_files']), 100)

        # Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5,
                                    "T1_US_FNAL_Disk")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1,
                                    "T1_US_FNAL_Disk")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2,
                                    "T1_US_FNAL_Disk")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1,
                                    "T1_US_FNAL_Disk")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1,
                                    "T1_US_FNAL_Disk")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1,
                                    "T1_US_FNAL_Disk")
        testFileG = self.createFile("/this/is/file7", 153, 6, 3,
                                    "T1_US_FNAL_Disk")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # Split the work targeting 150 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=150,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7)

        # Test interactions of this algorithm with splitOnRun = True
        # Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file10", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("T2_CH_CERN")

        fileB = self.createFile('/this/is/file11', 200, 3, 5, "T2_CH_CERN")

        testFileset = Fileset(name='FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True,
                               halt_job_on_file_boundaries=False,
                               events_per_job=700,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7)
        # Make sure each job has one run
        for job in jobs:
            self.assertEqual(len(job['mask'].getRunAndLumis()), 1)
Example #13
0
    def _createThisSubscription(self, initialCounter=1):
        """
        Private function to create a fileset and subscription with
        different fileset and file names

        :param initialCounter: just a simple integer to be appended to files
        :return: an splitter instance (jobFactory)
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        testFileset = Fileset(name='Fileset%s' % initialCounter)

        fileA = File(lfn="/this/is/file%s" % initialCounter,
                     size=1000,
                     events=800)
        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("T1_US_FNAL_Disk")

        initialCounter = int(initialCounter) + 1
        fileB = File(lfn="/this/is/file%s" % initialCounter,
                     size=1000,
                     events=400)
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("T1_US_FNAL_Disk")

        initialCounter = int(initialCounter) + 1
        fileC = File(lfn="/this/is/file%s" % initialCounter,
                     size=1000,
                     events=500)
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("T1_US_FNAL_Disk")

        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        return jobFactory
Example #14
0
    def addMCFakeFile(self):
        """Add a fake file for wmbs to run production over"""
        needed = [
            'FirstEvent', 'FirstLumi', 'FirstRun', 'LastEvent', 'LastLumi',
            'LastRun'
        ]
        for key in needed:
            if self.mask and self.mask.get(key) is None:
                msg = 'Invalid value "%s" for %s' % (self.mask.get(key), key)
                raise WorkQueueWMBSException(msg)
        locations = set()
        for site in self.getLocations.execute(
                conn=self.getDBConn(), transaction=self.existingTransaction()):
            try:
                siteInfo = self.getLocationInfo.execute(
                    site,
                    conn=self.getDBConn(),
                    transaction=self.existingTransaction())
                if not siteInfo:
                    self.logger.info(
                        'Skipping MonteCarlo injection to site "%s" as unknown to wmbs'
                        % site)
                    continue
                locations.add(siteInfo[0]['pnn'])
            except Exception as ex:
                self.logger.error(
                    'Error getting storage element for "%s": %s' %
                    (site, str(ex)))
        if not locations:
            msg = 'No locations to inject Monte Carlo work to, unable to proceed'
            raise WorkQueueWMBSException(msg)
        mcFakeFileName = ("MCFakeFile-%s" % self.topLevelFileset.name).encode(
            'ascii', 'ignore')
        wmbsFile = File(
            lfn=mcFakeFileName,
            first_event=self.mask['FirstEvent'],
            last_event=self.mask['LastEvent'],
            events=self.mask['LastEvent'] - self.mask['FirstEvent'] +
            1,  # inclusive range
            locations=locations,
            merged=False,  # merged causes dbs parentage relation
        )

        if self.mask:
            lumis = range(self.mask['FirstLumi'],
                          self.mask['LastLumi'] + 1)  # inclusive range
            wmbsFile.addRun(Run(self.mask['FirstRun'],
                                *lumis))  # assume run number static
        else:
            wmbsFile.addRun(Run(1, 1))

        wmbsFile['inFileset'] = True  # file is not a parent

        logging.info("WMBS File: %s on Location: %s", wmbsFile['lfn'],
                     wmbsFile['newlocations'])

        self.wmbsFilesToCreate.append(wmbsFile)

        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(
            self.wmbsFilesToCreate, self.wmSpec.name(), isDBS=self.isDBS)

        self.topLevelFileset.markOpen(False)
        return totalFiles
Example #15
0
    def test10(self):
        """
        _test10_

        Test merging of multiple lumis with holes in the lumi sequence

        Hole is due to no streamer files for the lumi

        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 5]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 4,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 1,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        self.feedStreamersDAO.execute(transaction = False)
        self.fileset1.loadData()

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.acquireFilesDAO.execute(self.subscription1['id'], fileid,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.completeFilesDAO.execute(self.subscription1['id'], fileid,
                                          transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Example #16
0
    def testReportHandling(self):
        """
        _testReportHandling_

        Verify that we're able to parse a CMSSW report, convert it to a Report()
        style report, pickle it and then have the accountant process it.
        """
        self.procPath = os.path.join(
            WMCore.WMBase.getTestBase(),
            "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml")

        myReport = Report("cmsRun1")
        myReport.parse(self.procPath)

        # Fake some metadata that should be added by the stageout scripts.
        for fileRef in myReport.getAllFileRefsFromStep("cmsRun1"):
            fileRef.size = 1024
            fileRef.location = "cmssrm.fnal.gov"

        fwjrPath = os.path.join(self.tempDir, "ProcReport.pkl")
        cmsRunStep = myReport.retrieveStep("cmsRun1")
        cmsRunStep.status = 0
        myReport.setTaskName('/TestWF/None')
        myReport.persist(fwjrPath)

        self.setFWJRAction.execute(jobID=self.testJob["id"], fwjrPath=fwjrPath)

        pFile = DBSBufferFile(lfn="/path/to/some/lfn",
                              size=600000,
                              events=60000)
        pFile.setAlgorithm(appName="cmsRun",
                           appVer="UNKNOWN",
                           appFam="RECO",
                           psetHash="GIBBERISH",
                           configContent="MOREGIBBERISH")
        pFile.setDatasetPath("/bogus/dataset/path")
        #pFile.addRun(Run(1, *[45]))
        pFile.create()

        config = self.createConfig(workerThreads=1)
        accountant = JobAccountantPoller(config)
        accountant.setup()
        accountant.algorithm()

        self.verifyJobSuccess(self.testJob["id"])
        self.verifyFileMetaData(self.testJob["id"],
                                myReport.getAllFilesFromStep("cmsRun1"))

        inputFile = File(
            lfn=
            "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root"
        )
        inputFile.load()
        self.testMergeJob = Job(name="testMergeJob", files=[inputFile])
        self.testMergeJob.create(group=self.mergeJobGroup)
        self.testMergeJob["state"] = "complete"
        self.stateChangeAction.execute(jobs=[self.testMergeJob])

        self.mergePath = os.path.join(
            WMCore.WMBase.getTestBase(),
            "WMCore_t/FwkJobReport_t/CMSSWMergeReport.xml")

        myReport = Report("mergeReco")
        myReport.parse(self.mergePath)

        # Fake some metadata that should be added by the stageout scripts.
        for fileRef in myReport.getAllFileRefsFromStep("mergeReco"):
            fileRef.size = 1024
            fileRef.location = "cmssrm.fnal.gov"
            fileRef.dataset = {
                "applicationName": "cmsRun",
                "applicationVersion": "CMSSW_3_4_2_patch1",
                "primaryDataset": "MinimumBias",
                "processedDataset": "Rereco-v1",
                "dataTier": "RECO"
            }

        fwjrPath = os.path.join(self.tempDir, "MergeReport.pkl")
        myReport.setTaskName('/MergeWF/None')
        cmsRunStep = myReport.retrieveStep("mergeReco")
        cmsRunStep.status = 0
        myReport.persist(fwjrPath)

        self.setFWJRAction.execute(jobID=self.testMergeJob["id"],
                                   fwjrPath=fwjrPath)
        accountant.algorithm()

        self.verifyJobSuccess(self.testMergeJob["id"])
        self.verifyFileMetaData(self.testMergeJob["id"],
                                myReport.getAllFilesFromStep("mergeReco"))

        return
Example #17
0
    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test event threshold (single job creation)

        Test that only closed lumis are used

        Test check on express release

        """
        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "Express",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : 0 } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        jobGroups = jobFactory(maxInputEvents = 200)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(maxInputEvents = 200)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.finalCloseLumis()

        jobGroups = jobFactory(maxInputEvents = 200)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.releaseExpressDAO.execute(binds = { 'RUN' : 1 }, transaction = False)

        jobGroups = jobFactory(maxInputEvents = 200)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Express-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return
Example #18
0
    def setUp(self):
        """
        _setUp_

        Setup the database and WMBS for the test.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", "WMCore.WMBS"],
            useDefault=False)

        myThread = threading.currentThread()
        self.daofactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.dbsfactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        locationAction = self.daofactory(classname="Locations.New")
        locationAction.execute(siteName="site1", seName="cmssrm.fnal.gov")

        inputFile = File(lfn="/path/to/some/lfn",
                         size=10,
                         events=10,
                         locations="cmssrm.fnal.gov")
        inputFile.create()

        inputFileset = Fileset(name="InputFileset")
        inputFileset.create()
        inputFileset.addFile(inputFile)
        inputFileset.commit()

        unmergedFileset = Fileset(name="UnmergedFileset")
        unmergedFileset.create()

        mergedFileset = Fileset(name="MergedFileset")
        mergedFileset.create()

        procWorkflow = Workflow(spec="wf001.xml",
                                owner="Steve",
                                name="TestWF",
                                task="/TestWF/None")
        procWorkflow.create()
        procWorkflow.addOutput("outputRECORECO", unmergedFileset)

        mergeWorkflow = Workflow(spec="wf002.xml",
                                 owner="Steve",
                                 name="MergeWF",
                                 task="/MergeWF/None")
        mergeWorkflow.create()
        mergeWorkflow.addOutput("Merged", mergedFileset)

        insertWorkflow = self.dbsfactory(classname="InsertWorkflow")
        insertWorkflow.execute("TestWF", "/TestWF/None", 0, 0, 0, 0)
        insertWorkflow.execute("MergeWF", "/MergeWF/None", 0, 0, 0, 0)

        self.procSubscription = Subscription(fileset=inputFileset,
                                             workflow=procWorkflow,
                                             split_algo="FileBased",
                                             type="Processing")
        self.procSubscription.create()
        self.procSubscription.acquireFiles()

        self.mergeSubscription = Subscription(fileset=unmergedFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize",
                                              type="Merge")
        self.mergeSubscription.create()

        self.procJobGroup = JobGroup(subscription=self.procSubscription)
        self.procJobGroup.create()
        self.mergeJobGroup = JobGroup(subscription=self.mergeSubscription)
        self.mergeJobGroup.create()

        self.testJob = Job(name="testJob", files=[inputFile])
        self.testJob.create(group=self.procJobGroup)
        self.testJob["state"] = "complete"

        myThread = threading.currentThread()
        self.daofactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.stateChangeAction = self.daofactory(classname="Jobs.ChangeState")
        self.setFWJRAction = self.daofactory(classname="Jobs.SetFWJRPath")
        self.getJobTypeAction = self.daofactory(classname="Jobs.GetType")
        locationAction = self.daofactory(classname="Locations.New")
        locationAction.execute(siteName="cmssrm.fnal.gov")

        self.stateChangeAction.execute(jobs=[self.testJob])

        self.tempDir = tempfile.mkdtemp()
        return
Example #19
0
class WMBSHelper(WMConnectionBase):
    """
    _WMBSHelper_

    Interface between the WorkQueue and WMBS.
    """
    def __init__(self,
                 wmSpec,
                 taskName,
                 blockName=None,
                 mask=None,
                 cachepath='.'):
        """
        _init_

        Initialize DAOs and other things needed.
        """
        self.block = blockName
        self.mask = mask
        self.wmSpec = wmSpec
        self.topLevelTask = wmSpec.getTask(taskName)
        self.cachepath = cachepath
        self.isDBS = True

        self.topLevelFileset = None
        self.topLevelSubscription = None

        self.mergeOutputMapping = {}

        # Initiate the pieces you need to run your own DAOs
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        # DAOs from WMBS for file commit
        self.setParentage = self.daofactory(classname="Files.SetParentage")
        self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi")
        self.setFileLocation = self.daofactory(
            classname="Files.SetLocationForWorkQueue")
        self.setFileAddChecksum = self.daofactory(
            classname="Files.AddChecksumByLFN")
        self.addFileAction = self.daofactory(classname="Files.Add")
        self.addToFileset = self.daofactory(classname="Files.AddDupsToFileset")
        self.getLocations = self.daofactory(classname="Locations.ListSites")
        self.getLocationInfo = self.daofactory(
            classname="Locations.GetSiteInfo")

        # DAOs from DBSBuffer for file commit
        self.dbsCreateFiles = self.dbsDaoFactory(
            classname="DBSBufferFiles.Add")
        self.dbsSetLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddLocation")
        self.dbsSetChecksum = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddChecksumByLFN")
        self.dbsInsertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow")

        # Added for file creation bookkeeping
        self.dbsFilesToCreate = []
        self.addedLocations = []
        self.wmbsFilesToCreate = []
        self.insertedBogusDataset = -1

        return

    def createSandbox(self):
        """Create the runtime sandbox"""
        sandboxCreator = SandboxCreator()
        sandboxCreator.makeSandbox(self.cachepath, self.wmSpec)

    def createTopLevelFileset(self, topLevelFilesetName=None):
        """
        _createTopLevelFileset_

        Create the top level fileset for the workflow.  If the name of the top
        level fileset is not given create one.
        """
        if topLevelFilesetName == None:
            filesetName = (
                "%s-%s" %
                (self.wmSpec.name(), self.wmSpec.getTopLevelTask()[0].name()))
            if self.block:
                filesetName += "-%s" % self.block
            if self.mask:
                from hashlib import md5
                mask_string = ",".join(
                    ["%s=%s" % (x, self.mask[x]) for x in sorted(self.mask)])
                filesetName += "-%s" % md5(mask_string).hexdigest()
        else:
            filesetName = topLevelFilesetName

        self.topLevelFileset = Fileset(filesetName)
        self.topLevelFileset.create()
        return

    def outputFilesetName(self, task, outputModuleName):
        """
        _outputFilesetName_

        Generate an output fileset name for the given task and output module.
        """
        if task.taskType() == "Merge":
            outputFilesetName = "%s/merged-%s" % (task.getPathName(),
                                                  outputModuleName)
        else:
            outputFilesetName = "%s/unmerged-%s" % (task.getPathName(),
                                                    outputModuleName)

        return outputFilesetName

    def createSubscription(self, task, fileset, alternativeFilesetClose=False):
        """
        _createSubscription_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        #FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(
            spec=self.wmSpec.specUrl(),
            owner=self.wmSpec.getOwner()["name"],
            dn=self.wmSpec.getOwner().get("dn", "unknown"),
            group=self.wmSpec.getOwner().get("group", "unknown"),
            owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
            owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"),
            name=self.wmSpec.name(),
            task=task.getPathName(),
            wfType=self.wmSpec.getDashboardActivity(),
            alternativeFilesetClose=alternativeFilesetClose)
        workflow.create()
        subscription = Subscription(fileset=fileset,
                                    workflow=workflow,
                                    split_algo=task.jobSplittingAlgorithm(),
                                    type=task.getPrimarySubType())
        if subscription.exists():
            subscription.load()
            msg = "Subscription %s already exists for %s (you may ignore file insertion messages below, existing files wont be duplicated)"
            self.logger.info(msg % (subscription['id'], task.getPathName()))
        else:
            subscription.create()
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": True
            }])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": False
            }])

        if self.topLevelSubscription == None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription created: %s" %
                         subscription["id"])
        else:
            logging.info("Child subscription created: %s" % subscription["id"])

        outputModules = task.getOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                outputFileset = Fileset(
                    self.outputFilesetName(task, outputModuleName))
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        if childTask.taskType() == "Merge":
                            mergedOutputFileset = Fileset(
                                self.outputFilesetName(childTask, "Merged"))
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(
                                getattr(outputModule, outputModuleName),
                                "primaryDataset", None)
                            if primaryDataset != None:
                                self.mergeOutputMapping[
                                    mergedOutputFileset.id] = primaryDataset

                        self.createSubscription(childTask, outputFileset,
                                                alternativeFilesetClose)

                if mergedOutputFileset == None:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       outputFileset)
                else:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       mergedOutputFileset)

        return self.topLevelSubscription

    def addMCFakeFile(self):
        """Add a fake file for wmbs to run production over"""
        needed = [
            'FirstEvent', 'FirstLumi', 'FirstRun', 'LastEvent', 'LastLumi',
            'LastRun'
        ]
        for key in needed:
            if self.mask and self.mask.get(key) is None:
                raise RuntimeError, 'Invalid value "%s" for %s' % (
                    self.mask.get(key), key)
        locations = set()
        for site in self.getLocations.execute(
                conn=self.getDBConn(), transaction=self.existingTransaction()):
            try:
                siteInfo = self.getLocationInfo.execute(
                    site,
                    conn=self.getDBConn(),
                    transaction=self.existingTransaction())
                if not siteInfo:
                    self.logger.info(
                        'Skipping MonteCarlo injection to site "%s" as unknown to wmbs'
                        % site)
                    continue
                locations.add(siteInfo[0]['se_name'])
            except StandardError, ex:
                self.logger.error(
                    'Error getting storage element for "%s": %s' %
                    (site, str(ex)))
        if not locations:
            raise RuntimeError, "No locations to inject Monte Carlo work to, unable to proceed"
        mcFakeFileName = "MCFakeFile-%s" % self.topLevelFileset.name
        wmbsFile = File(
            lfn=mcFakeFileName,
            first_event=self.mask['FirstEvent'],
            last_event=self.mask['LastEvent'],
            events=self.mask['LastEvent'] - self.mask['FirstEvent'] +
            1,  # inclusive range
            locations=locations,
            merged=False,  # merged causes dbs parentage relation
        )

        if self.mask:
            lumis = range(self.mask['FirstLumi'],
                          self.mask['LastLumi'] + 1)  # inclusive range
            wmbsFile.addRun(Run(self.mask['FirstRun'],
                                *lumis))  # assume run number static
        else:
            wmbsFile.addRun(Run(1, 1))

        wmbsFile['inFileset'] = True  # file is not a parent

        logging.info("WMBS File: %s on Location: %s" %
                     (wmbsFile['lfn'], wmbsFile['newlocations']))

        self.wmbsFilesToCreate.append(wmbsFile)

        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(
            self.wmbsFilesToCreate, self.wmSpec.name(), isDBS=self.isDBS)

        self.topLevelFileset.markOpen(False)
        return totalFiles
Example #20
0
    def __call__(self, filesetToProcess):
        """
        The algorithm itself
        """

        # Get configuration
        initObj = WMInit()
        initObj.setLogging()
        initObj.setDatabaseConnection(os.getenv("DATABASE"), \
            os.getenv('DIALECT'), os.getenv("DBSOCK"))

        myThread = threading.currentThread()

        daofactory = DAOFactory(package = "WMCore.WMBS" , \
              logger = myThread.logger, \
              dbinterface = myThread.dbi)

        lastFileset = daofactory(classname="Fileset.ListFilesetByTask")
        lastWorkflow = daofactory(classname="Workflow.LoadFromTask")
        subsRun = daofactory(\
classname = "Subscriptions.LoadFromFilesetWorkflow")
        successJob = daofactory(classname="Subscriptions.SucceededJobs")
        allJob = daofactory(classname="Subscriptions.Jobs")
        fileInFileset = daofactory(classname="Files.InFileset")

        # Get the start Run if asked
        startRun = (filesetToProcess.name).split(":")[3]
        logging.debug("the T0Feeder is processing %s" % \
                 filesetToProcess.name)
        logging.debug("the fileset name %s" % \
         (filesetToProcess.name).split(":")[0])

        fileType = (filesetToProcess.name).split(":")[2]
        crabTask = filesetToProcess.name.split(":")[0]
        LASTIME = filesetToProcess.lastUpdate

        tries = 1
        while True:

            try:

                myRequester = JSONRequests(url="vocms52.cern.ch:8889")
                requestResult = myRequester.get("/tier0/runs")

            except:

                logging.debug("T0Reader call error...")
                if tries == self.maxRetries:
                    return
                else:
                    tries += 1
                    continue

            logging.debug("T0ASTRunChain feeder queries done ...")
            now = time.time()

            break

        for listRun in requestResult[0]:

            if startRun != 'None' and int(listRun['run']) >= int(startRun):
                if listRun['status'] =='CloseOutExport' or listRun\
        ['status']=='Complete' or listRun['status']=='CloseOutT1Skimming':

                    crabWorkflow = lastWorkflow.execute(task=crabTask)

                    crabFileset = lastFileset.execute\
                                (task=crabTask)

                    crabrunFileset = Fileset(\
    name = crabFileset[0]["name"].split(':')[0].split\
   ('-Run')[0]+ '-Run' + str(listRun['run']) + ":" + \
     ":".join(crabFileset[0]['name'].split(':')[1:]) )

                    if crabrunFileset.exists() > 0:

                        crabrunFileset.load()
                        currSubs = subsRun.execute\
           (crabrunFileset.id, crabWorkflow[0]['id'])

                        if currSubs:

                            listsuccessJob = successJob.execute(\
                                 subscription=currSubs['id'])
                            listallJob = allJob.execute(\
                                subscription=currSubs['id'])

                            if len(listsuccessJob) == len(listallJob):

                                for currid in listsuccessJob:
                                    currjob = Job(id=currid)
                                    currjob.load()

                                    logging.debug("Reading FJR %s" %
                                                  currjob['fwjr_path'])

                                    jobReport = readJobReport(
                                        currjob['fwjr_path'])

                                    if len(jobReport) > 0:

                                        if jobReport[0].files:

                                            for newFile in jobReport[0].files:

                                                logging.debug(\
                               "Output path %s" %newFile['LFN'])
                                                newFileToAdd = File(\
                             lfn=newFile['LFN'], locations ='caf.cern.ch')

                                                LOCK.acquire()

                                                if newFileToAdd.exists\
                                                      () == False :

                                                    newFileToAdd.create()
                                                else:
                                                    newFileToAdd.loadData()

                                                LOCK.release()

                                                listFile = \
                             fileInFileset.execute(filesetToProcess.id)
                                                if {'fileid': \
                                 newFileToAdd['id']} not in listFile:

                                                    filesetToProcess.addFile(\
                                                        newFileToAdd)
                                                    filesetToProcess\
                                                    .setLastUpdate(now)
                                                    filesetToProcess.commit()
                                                    logging.debug(\
                                                     "new file created/loaded and added by T0ASTRunChain...")

                                        elif jobReport[0].analysisFiles:

                                            for newFile in jobReport\
                                                [0].analysisFiles:


                                                logging.debug(\
                             "Ouput path %s " %newFile['LFN'])
                                                newFileToAdd = File(\
                               lfn=newFile['LFN'], locations ='caf.cern.ch')

                                                LOCK.acquire()

                                                if newFileToAdd.exists\
                                                     () == False :
                                                    newFileToAdd.create()
                                                else:
                                                    newFileToAdd.loadData()

                                                LOCK.release()

                                                listFile = \
                              fileInFileset.execute(filesetToProcess.id)
                                                if {'fileid': newFileToAdd\
                                                  ['id']} not in listFile:

                                                    logging.debug\
                                             ("%s loaded and added by T0ASTRunChain" %newFile['LFN'])
                                                    filesetToProcess.addFile\
                                                         (newFileToAdd)
                                                    filesetToProcess.\
                                                       setLastUpdate(now)
                                                    filesetToProcess.commit()
                                                    logging.debug(\
                                                      "new file created/loaded added by T0ASTRunChain...")

                                        else:
                                            break  #Missed fjr - Try next time

        # Commit the fileset
        logging.debug("Test purge in T0ASTRunChain ...")
        filesetToProcess.load()
        LASTIME = filesetToProcess.lastUpdate

        # For re-opned fileset or empty, try until the purge time
        if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime:

            filesetToProcess.setLastUpdate(time.time())
            filesetToProcess.commit()

        if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime:

            filesetToProcess.markOpen(False)
            logging.debug("Purge Done...")
Example #21
0
    def testMask(self):
        """
        _testMask_

        Test the new mask setup
        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102])
        testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202])
        testJob.create(group=testJobGroup)

        loadJob = Job(id=testJob.exists())
        loadJob.loadData()

        runs = loadJob['mask'].getRunAndLumis()
        self.assertEqual(len(runs), 2)
        self.assertEqual(runs[100], [[101, 102]])
        self.assertEqual(runs[200], [[201, 202]])

        bigRun = Run(100, *[101, 102, 103, 104])
        badRun = Run(300, *[1001, 1002])
        result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun])

        self.assertEqual(len(result), 1)
        alteredRun = result.pop()
        self.assertEqual(alteredRun.run, 100)
        self.assertEqual(alteredRun.lumis, [101, 102])

        run0 = Run(300, *[1001, 1002])
        run1 = Run(300, *[1001, 1002])
        loadJob['mask'].filterRunLumisByMask([run0, run1])

        return
Example #22
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName="site1", seName="somese.cern.ch")

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations="somese.cern.ch")
            newFile.addRun(Run(i, *[45 + i]))
            newFile.create()
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations="somese.cern.ch")
        newFile.addRun(Run(1, *[45]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleFileRunset = Fileset(name="TestFileset3")
        self.multipleFileRunset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations="somese.cern.ch")
            newFile.addRun(Run(i / 3, *[45]))
            newFile.create()
            self.multipleFileRunset.addFile(newFile)
        self.multipleFileRunset.commit()

        self.singleRunFileset = Fileset(name="TestFileset4")
        self.singleRunFileset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations="somese.cern.ch")
            newFile.addRun(Run(1, *[45]))
            newFile.create()
            self.singleRunFileset.addFile(newFile)
        self.singleRunFileset.commit()

        self.singleRunMultipleLumi = Fileset(name="TestFileset5")
        self.singleRunMultipleLumi.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations="somese.cern.ch")
            newFile.addRun(Run(1, *[45 + i]))
            newFile.create()
            self.singleRunMultipleLumi.addFile(newFile)
        self.singleRunMultipleLumi.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="mnorman",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.multipleRunSubscription = Subscription(
            fileset=self.multipleFileRunset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.singleRunSubscription = Subscription(
            fileset=self.singleRunFileset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.singleRunMultipleLumiSubscription = Subscription(
            fileset=self.singleRunMultipleLumi,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")

        self.multipleFileSubscription.create()
        self.singleFileSubscription.create()
        self.multipleRunSubscription.create()
        self.singleRunSubscription.create()
        self.singleRunMultipleLumiSubscription.create()

        return
Example #23
0
    def testGetOutputParentLFNs(self):
        """
        _testGetOutputParentLFNs_

        Verify that the getOutputDBSParentLFNs() method returns the correct
        parent LFNs.
        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Simon",
                                name="wf001",
                                task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA",
                         size=1024,
                         events=10,
                         merged=True)
        testFileB = File(lfn="/this/is/a/lfnB",
                         size=1024,
                         events=10,
                         merged=True)
        testFileC = File(lfn="/this/is/a/lfnC",
                         size=1024,
                         events=10,
                         merged=False)
        testFileD = File(lfn="/this/is/a/lfnD",
                         size=1024,
                         events=10,
                         merged=False)
        testFileE = File(lfn="/this/is/a/lfnE",
                         size=1024,
                         events=10,
                         merged=True)
        testFileF = File(lfn="/this/is/a/lfnF",
                         size=1024,
                         events=10,
                         merged=True)
        testFileA.create()
        testFileB.create()
        testFileC.create()
        testFileD.create()
        testFileE.create()
        testFileF.create()

        testFileE.addChild(testFileC["lfn"])
        testFileF.addChild(testFileD["lfn"])

        testJobA = Job(name="TestJob", files=[testFileA, testFileB])
        testJobA["couch_record"] = "somecouchrecord"
        testJobA["location"] = "test.site.ch"
        testJobA.create(group=testJobGroup)
        testJobA.associateFiles()

        testJobB = Job(name="TestJobB", files=[testFileC, testFileD])
        testJobB["couch_record"] = "somecouchrecord"
        testJobB["location"] = "test.site.ch"
        testJobB.create(group=testJobGroup)
        testJobB.associateFiles()

        goldenLFNs = ["/this/is/a/lfnA", "/this/is/a/lfnB"]

        parentLFNs = testJobA.getOutputDBSParentLFNs()
        for parentLFN in parentLFNs:
            assert parentLFN in goldenLFNs, \
                "ERROR: Unknown lfn: %s" % parentLFN
            goldenLFNs.remove(parentLFN)

        assert len(goldenLFNs) == 0, \
            "ERROR: LFNs are missing: %s" % goldenLFNs

        goldenLFNs = ["/this/is/a/lfnE", "/this/is/a/lfnF"]

        parentLFNs = testJobB.getOutputDBSParentLFNs()
        for parentLFN in parentLFNs:
            assert parentLFN in goldenLFNs, \
                "ERROR: Unknown lfn: %s" % parentLFN
            goldenLFNs.remove(parentLFN)

        assert len(goldenLFNs) == 0, \
            "ERROR: LFNs are missing..."

        return
Example #24
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "Locations.New")
        locationAction.execute(siteName = 's1', seName = "somese.cern.ch")
        locationAction.execute(siteName = 's2', seName = "otherse.cern.ch")

        self.multipleFileFileset = Fileset(name = "TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/', size = 1000, events = 100,
                          locations = set(["somese.cern.ch"]))
        parentFile.create()
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["somese.cern.ch"]))
            newFile.create()
            newFile.addParent(lfn = parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name = "TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name", size = 1000, events = 100,
                       locations = set(["somese.cern.ch"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()


        self.multipleSiteFileset = Fileset(name = "TestFileset3")
        self.multipleSiteFileset.create()
        for i in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation("somese.cern.ch")
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation(["somese.cern.ch","otherse.cern.ch"])
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "EventBased",
                                                     type = "Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "EventBased",
                                                   type = "Processing")
        self.singleFileSubscription.create()
        self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "EventBased",
                                                     type = "Processing")
        self.multipleSiteSubscription.create()
        return
Example #25
0
    def testCompleteJobInput(self):
        """
        _testCompleteJobInput_

        Verify the correct output of the CompleteInput DAO.  This should mark
        the input for a job as complete once all the jobs that run over a
        particular file have complete successfully.
        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        bogusWorkflow = Workflow(spec="spec1.xml",
                                 owner="Steve",
                                 name="wf002",
                                 task="Test")
        testWorkflow.create()
        bogusWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        bogusFileset = Fileset(name="BogusFileset")
        testFileset.create()
        bogusFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        bogusSubscription = Subscription(fileset=bogusFileset,
                                         workflow=bogusWorkflow)
        testSubscription.create()
        bogusSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="setest.site.ch")
        testFileB = File(lfn=makeUUID(), locations="setest.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        bogusFileset.addFile([testFileA, testFileB])
        testFileset.commit()
        bogusFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])
        bogusSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        bogusJobGroup = JobGroup(subscription=bogusSubscription)
        testJobGroup.create()
        bogusJobGroup.create()

        testJobA = Job(name="TestJobA", files=[testFileA])
        testJobB = Job(name="TestJobB", files=[testFileA])
        testJobC = Job(name="TestJobC", files=[testFileB])
        bogusJob = Job(name="BogusJob", files=[testFileA, testFileB])
        testJobA.create(group=testJobGroup)
        testJobB.create(group=testJobGroup)
        testJobC.create(group=testJobGroup)
        bogusJob.create(group=bogusJobGroup)

        testJobA["outcome"] = "success"
        testJobB["outcome"] = "failure"
        testJobC["outcome"] = "success"
        testJobA.save()
        testJobB.save()
        testJobC.save()

        testJobA.completeInputFiles()

        compFiles = len(testSubscription.filesOfStatus("Completed"))
        assert compFiles == 0, \
               "Error: test sub has wrong number of complete files: %s" % compFiles

        testJobB["outcome"] = "success"
        testJobB.save()

        testJobB.completeInputFiles()

        availFiles = len(testSubscription.filesOfStatus("Available"))
        assert availFiles == 0, \
               "Error: test sub has wrong number of available files: %s" % availFiles

        acqFiles = len(testSubscription.filesOfStatus("Acquired"))
        assert acqFiles == 1, \
               "Error: test sub has wrong number of acquired files: %s" % acqFiles

        compFiles = len(testSubscription.filesOfStatus("Completed"))
        assert compFiles == 1, \
               "Error: test sub has wrong number of complete files: %s" % compFiles

        failFiles = len(testSubscription.filesOfStatus("Failed"))
        assert failFiles == 0, \
               "Error: test sub has wrong number of failed files: %s" % failFiles

        availFiles = len(bogusSubscription.filesOfStatus("Available"))
        assert availFiles == 0, \
               "Error: test sub has wrong number of available files: %s" % availFiles

        acqFiles = len(bogusSubscription.filesOfStatus("Acquired"))
        assert acqFiles == 2, \
               "Error: test sub has wrong number of acquired files: %s" % acqFiles

        compFiles = len(bogusSubscription.filesOfStatus("Completed"))
        assert compFiles == 0, \
               "Error: test sub has wrong number of complete files: %s" % compFiles

        failFiles = len(bogusSubscription.filesOfStatus("Failed"))
        assert failFiles == 0, \
               "Error: test sub has wrong number of failed files: %s" % failFiles

        return
Example #26
0
    def testB_NoFileSplitNoHardLimit(self):
        """
        _testB_NoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        # Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100, lumisPerFile=7, twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # First test, the optimal settings are 360 events per job
        # As we have files with 0 events per lumi, this will configure the splitting to
        # a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 0, "There aren't enough events, so it should have 0 job groups")

        # we close this fileset to get it moving
        fileset = testSubscription.getFileset()
        fileset.markOpen(False)

        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1, "There should be 1 job")
        self.assertEqual(len(jobs[0]['input_files']), 100, "All 100 files must be in the job")
        self.assertEqual(jobs[0]['estimatedMemoryUsage'], 2300)
        self.assertEqual(jobs[0]['estimatedDiskUsage'], 0)
        self.assertEqual(jobs[0]['estimatedJobTime'], 0)

        # Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileset.create()
        testFileA = self.createFile("/this/is/file1", 250, 0, 5, "T2_CH_CERN")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1, "T2_CH_CERN")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "T2_CH_CERN")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1, "T2_CH_CERN")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1, "T2_CH_CERN")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1, "T2_CH_CERN")
        testFileG = self.createFile("/this/is/file7", 151, 6, 3, "T2_CH_CERN")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # Optimal settings are: jobs with 150 events per job
        # This means, the first file must be splitted in 3 lumis per job which would leave room
        # for another lumi in the second job, but the second file has a lumi too big for that
        # The 3rd job only contains the second file, the fourth and fifth job split the third file
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=150,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7, "7 jobs must be in the jobgroup")
        self.assertEqual(jobs[0]["mask"].getRunAndLumis(), {0: [[0, 2]]}, "Wrong mask for the first job")
        self.assertEqual(jobs[0]["estimatedJobTime"], 150 * 12)
        self.assertEqual(jobs[0]["estimatedDiskUsage"], 150 * 400)
        self.assertEqual(jobs[1]["mask"].getRunAndLumis(), {0: [[3, 4]]}, "Wrong mask for the second job")
        self.assertEqual(jobs[1]["estimatedJobTime"], 100 * 12)
        self.assertEqual(jobs[1]["estimatedDiskUsage"], 100 * 400)
        self.assertEqual(jobs[2]["mask"].getRunAndLumis(), {1: [[1, 1]]}, "Wrong mask for the third job")
        self.assertEqual(jobs[2]["estimatedJobTime"], 600 * 12)
        self.assertEqual(jobs[2]["estimatedDiskUsage"], 600 * 400)
        self.assertEqual(jobs[3]["mask"].getRunAndLumis(), {2: [[4, 4]]}, "Wrong mask for the fourth job")
        self.assertEqual(jobs[3]["estimatedJobTime"], 600 * 12)
        self.assertEqual(jobs[3]["estimatedDiskUsage"], 600 * 400)
        self.assertEqual(jobs[4]["mask"].getRunAndLumis(), {2: [[5, 5]]}, "Wrong mask for the fifth job")
        self.assertEqual(jobs[4]["estimatedJobTime"], 600 * 12)
        self.assertEqual(jobs[4]["estimatedDiskUsage"], 600 * 400)
        self.assertEqual(jobs[5]["mask"].getRunAndLumis(),
                         {3: [[3, 3]], 4: [[4, 4]], 5: [[5, 5]]}, "Wrong mask for the sixth job")
        self.assertEqual(jobs[5]["estimatedJobTime"], 140 * 12)
        self.assertEqual(jobs[5]["estimatedDiskUsage"], 140 * 400)
        self.assertEqual(jobs[6]["mask"].getRunAndLumis(), {6: [[18, 20]]}, "Wrong mask for the seventh job")
        self.assertEqual(jobs[6]["estimatedJobTime"], 150 * 12)
        self.assertEqual(jobs[6]["estimatedDiskUsage"], 150 * 400)

        for job in jobs:
            self.assertEqual(job["estimatedMemoryUsage"], 2300)
        # Test interactions of this algorithm with splitOnRun = True
        # Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1a", size=1000,
                     events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("T1_US_FNAL_Disk")

        fileB = self.createFile('/this/is/file2a', 200, 3, 5, "T1_US_FNAL_Disk")

        testFileset = Fileset(name='FilesetB')
        testFileset.create()
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.commit()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True,
                               halt_job_on_file_boundaries=False,
                               events_per_job=700,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        self.assertEqual(jobs[0]["estimatedJobTime"], 700 * 12)
        self.assertEqual(jobs[0]["estimatedDiskUsage"], 700 * 400)
        self.assertEqual(jobs[1]["estimatedJobTime"], 100 * 12)
        self.assertEqual(jobs[1]["estimatedDiskUsage"], 100 * 400)
        self.assertEqual(jobs[2]["estimatedJobTime"], 700 * 12)
        self.assertEqual(jobs[2]["estimatedDiskUsage"], 700 * 400)
        self.assertEqual(jobs[3]["estimatedJobTime"], 100 * 12)
        self.assertEqual(jobs[3]["estimatedDiskUsage"], 100 * 400)
        self.assertEqual(jobs[4]["estimatedJobTime"], 700 * 12)
        self.assertEqual(jobs[4]["estimatedDiskUsage"], 700 * 400)
        self.assertEqual(jobs[5]["estimatedJobTime"], 300 * 12)
        self.assertEqual(jobs[5]["estimatedDiskUsage"], 300 * 400)
Example #27
0
    def createTestJobGroup(self,
                           nJobs=10,
                           retry_count=1,
                           workloadPath='test',
                           fwjrPath=None,
                           workloadName=makeUUID()):
        """
        Creates a group of several jobs
        """

        myThread = threading.currentThread()
        myThread.transaction.begin()
        testWorkflow = Workflow(spec=workloadPath,
                                owner="cmsdataops",
                                group="cmsdataops",
                                name=workloadName,
                                task="/TestWorkload/ReReco")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFile0 = File(lfn="/this/is/a/parent", size=1024, events=10)
        testFile0.addRun(Run(10, *[12312]))
        testFile0.setLocation('malpaquet')

        testFileA = File(lfn="/this/is/a/lfnA",
                         size=1024,
                         events=10,
                         first_event=88)
        testFileA.addRun(Run(10, *[12312, 12313]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn="/this/is/a/lfnB",
                         size=1024,
                         events=10,
                         first_event=88)
        testFileB.addRun(Run(10, *[12314, 12315, 12316]))
        testFileB.setLocation('malpaquet')

        testFile0.create()
        testFileA.create()
        testFileB.create()

        testFileA.addParent(lfn="/this/is/a/parent")
        testFileB.addParent(lfn="/this/is/a/parent")

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob['retry_count'] = retry_count
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312])
            testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316])
            testJob['mask']['FirstEvent'] = 100
            testJob['cache_dir'] = os.path.join(self.testDir, testJob['name'])
            testJob['fwjr_path'] = fwjrPath
            os.mkdir(testJob['cache_dir'])
            testJobGroup.add(testJob)
            testJob.create(group=testJobGroup)
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob.save()

        testJobGroup.commit()

        testSubscription.acquireFiles(files=[testFileA, testFileB])
        testSubscription.save()
        myThread.transaction.commit()

        return testJobGroup
Example #28
0
    def setupForKillTest(self, baAPI=None):
        """
        _setupForKillTest_

        Inject a workflow into WMBS that has a processing task, a merge task and
        a cleanup task.  Inject files into the various tasks at various
        processing states (acquired, complete, available...).  Also create jobs
        for each subscription in various states.
        """
        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        dummyLocationAction = daoFactory(classname="Locations.New")
        changeStateAction = daoFactory(classname="Jobs.ChangeState")
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   pnn='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertThreshold(siteName='site1', taskType='Processing', \
                                        maxSlots=10000, pendingSlots=10000)

        userDN = 'someDN'
        userAction = daoFactory(classname="Users.New")
        userAction.execute(dn=userDN,
                           group_name='DEFAULT',
                           role_name='DEFAULT')

        inputFileset = Fileset("input")
        inputFileset.create()

        inputFileA = File("lfnA", locations="goodse.cern.ch")
        inputFileB = File("lfnB", locations="goodse.cern.ch")
        inputFileC = File("lfnC", locations="goodse.cern.ch")
        inputFileA.create()
        inputFileB.create()
        inputFileC.create()

        inputFileset.addFile(inputFileA)
        inputFileset.addFile(inputFileB)
        inputFileset.addFile(inputFileC)
        inputFileset.commit()

        unmergedOutputFileset = Fileset("unmerged")
        unmergedOutputFileset.create()

        unmergedFileA = File("ulfnA", locations="goodse.cern.ch")
        unmergedFileB = File("ulfnB", locations="goodse.cern.ch")
        unmergedFileC = File("ulfnC", locations="goodse.cern.ch")
        unmergedFileA.create()
        unmergedFileB.create()
        unmergedFileC.create()

        unmergedOutputFileset.addFile(unmergedFileA)
        unmergedOutputFileset.addFile(unmergedFileB)
        unmergedOutputFileset.addFile(unmergedFileC)
        unmergedOutputFileset.commit()

        mainProcWorkflow = Workflow(spec="spec1",
                                    owner="Steve",
                                    name="Main",
                                    task="Proc")
        mainProcWorkflow.create()
        mainProcMergeWorkflow = Workflow(spec="spec1",
                                         owner="Steve",
                                         name="Main",
                                         task="ProcMerge")
        mainProcMergeWorkflow.create()
        mainCleanupWorkflow = Workflow(spec="spec1",
                                       owner="Steve",
                                       name="Main",
                                       task="Cleanup")
        mainCleanupWorkflow.create()

        self.mainProcSub = Subscription(fileset=inputFileset,
                                        workflow=mainProcWorkflow,
                                        type="Processing")
        self.mainProcSub.create()
        self.mainProcSub.acquireFiles(inputFileA)
        self.mainProcSub.completeFiles(inputFileB)

        procJobGroup = JobGroup(subscription=self.mainProcSub)
        procJobGroup.create()
        self.procJobA = Job(name="ProcJobA")
        self.procJobA["state"] = "new"
        self.procJobA["location"] = "site1"
        self.procJobB = Job(name="ProcJobB")
        self.procJobB["state"] = "executing"
        self.procJobB["location"] = "site1"
        self.procJobC = Job(name="ProcJobC")
        self.procJobC["state"] = "complete"
        self.procJobC["location"] = "site1"
        self.procJobA.create(procJobGroup)
        self.procJobB.create(procJobGroup)
        self.procJobC.create(procJobGroup)

        self.mainMergeSub = Subscription(fileset=unmergedOutputFileset,
                                         workflow=mainProcMergeWorkflow,
                                         type="Merge")
        self.mainMergeSub.create()
        self.mainMergeSub.acquireFiles(unmergedFileA)
        self.mainMergeSub.failFiles(unmergedFileB)

        mergeJobGroup = JobGroup(subscription=self.mainMergeSub)
        mergeJobGroup.create()
        self.mergeJobA = Job(name="MergeJobA")
        self.mergeJobA["state"] = "exhausted"
        self.mergeJobA["location"] = "site1"
        self.mergeJobB = Job(name="MergeJobB")
        self.mergeJobB["state"] = "cleanout"
        self.mergeJobB["location"] = "site1"
        self.mergeJobC = Job(name="MergeJobC")
        self.mergeJobC["state"] = "new"
        self.mergeJobC["location"] = "site1"
        self.mergeJobA.create(mergeJobGroup)
        self.mergeJobB.create(mergeJobGroup)
        self.mergeJobC.create(mergeJobGroup)

        self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset,
                                           workflow=mainCleanupWorkflow,
                                           type="Cleanup")
        self.mainCleanupSub.create()
        self.mainCleanupSub.acquireFiles(unmergedFileA)
        self.mainCleanupSub.completeFiles(unmergedFileB)

        cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub)
        cleanupJobGroup.create()
        self.cleanupJobA = Job(name="CleanupJobA")
        self.cleanupJobA["state"] = "new"
        self.cleanupJobA["location"] = "site1"
        self.cleanupJobB = Job(name="CleanupJobB")
        self.cleanupJobB["state"] = "executing"
        self.cleanupJobB["location"] = "site1"
        self.cleanupJobC = Job(name="CleanupJobC")
        self.cleanupJobC["state"] = "complete"
        self.cleanupJobC["location"] = "site1"
        self.cleanupJobA.create(cleanupJobGroup)
        self.cleanupJobB.create(cleanupJobGroup)
        self.cleanupJobC.create(cleanupJobGroup)

        jobList = [
            self.procJobA, self.procJobB, self.procJobC, self.mergeJobA,
            self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB,
            self.cleanupJobC
        ]

        changeStateAction.execute(jobList)

        if baAPI:
            for job in jobList:
                job['plugin'] = 'TestPlugin'
                job['userdn'] = userDN
                job['usergroup'] = 'DEFAULT'
                job['userrole'] = 'DEFAULT'
                job['custom']['location'] = 'site1'
            baAPI.createNewJobs(wmbsJobs=jobList)

        # We'll create an unrelated workflow to verify that it isn't affected
        # by the killing code.
        bogusFileset = Fileset("dontkillme")
        bogusFileset.create()

        bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch")
        bogusFileA.create()
        bogusFileset.addFile(bogusFileA)
        bogusFileset.commit()

        bogusWorkflow = Workflow(spec="spec2",
                                 owner="Steve",
                                 name="Bogus",
                                 task="Proc")
        bogusWorkflow.create()
        self.bogusSub = Subscription(fileset=bogusFileset,
                                     workflow=bogusWorkflow,
                                     type="Processing")
        self.bogusSub.create()
        self.bogusSub.acquireFiles(bogusFileA)
        return
Example #29
0
    def makeNJobs(self,
                  name,
                  task,
                  nJobs,
                  jobGroup,
                  fileset,
                  sub,
                  site=None,
                  bl=[],
                  wl=[]):
        """
        _makeNJobs_

        Make and return a WMBS Job and File
        This handles all those damn add-ons

        """
        # Set the CacheDir
        cacheDir = os.path.join(self.testDir, 'CacheDir')

        for n in range(nJobs):
            # First make a file
            # site = self.sites[0]
            testFile = File(lfn="/singleLfn/%s/%s" % (name, n),
                            size=1024,
                            events=10)
            if site:
                testFile.setLocation(site)
            else:
                for tmpSite in self.sites:
                    testFile.setLocation('se.%s' % (tmpSite))
            testFile.create()
            fileset.addFile(testFile)

        fileset.commit()

        index = 0
        for f in fileset.files:
            index += 1
            testJob = Job(name='%s-%i' % (name, index))
            testJob.addFile(f)
            testJob["location"] = f.getLocations()[0]
            testJob['custom']['location'] = f.getLocations()[0]
            testJob['task'] = task.getPathName()
            testJob['sandbox'] = task.data.input.sandbox
            testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl')
            testJob['mask']['FirstEvent'] = 101
            testJob['owner'] = 'tapas'
            testJob["siteBlacklist"] = bl
            testJob["siteWhitelist"] = wl
            testJob['ownerDN'] = 'tapas'
            testJob['ownerRole'] = 'cmsrole'
            testJob['ownerGroup'] = 'phgroup'

            jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub),
                                    'Job_%i' % (index))
            os.makedirs(jobCache)
            testJob.create(jobGroup)
            testJob['cache_dir'] = jobCache
            testJob.save()
            jobGroup.add(testJob)
            output = open(os.path.join(jobCache, 'job.pkl'), 'w')
            pickle.dump(testJob, output)
            output.close()

        return testJob, testFile
Example #30
0
    def stuffWMBS(self, injected=True):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        self.mergeMergedFileset = Fileset(name="mergeMergedFileset")
        self.mergeMergedFileset.create()
        self.bogusMergedFileset = Fileset(name="bogusMergedFileset")
        self.bogusMergedFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow",
                                 spec="bunk2",
                                 owner="Steve",
                                 task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(
            classname="Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names=[mergeWorkflow.name], injected=injected)

        self.mergeSubscription = Subscription(fileset=self.mergeFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset=self.bogusFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputWorkflow = Workflow(name="inputWorkflow",
                                 spec="input",
                                 owner="Steve",
                                 task="Test")
        inputWorkflow.create()
        inputWorkflow.addOutput("output", self.mergeFileset,
                                self.mergeMergedFileset)
        inputWorkflow.addOutput("output2", self.bogusFileset,
                                self.bogusMergedFileset)
        bogusInputWorkflow = Workflow(name="bogusInputWorkflow",
                                      spec="input",
                                      owner="Steve",
                                      task="Test")
        bogusInputWorkflow.create()

        inputSubscription = Subscription(fileset=inputFileset,
                                         workflow=inputWorkflow)
        inputSubscription.create()
        bogusInputSubscription = Subscription(fileset=inputFileset,
                                              workflow=bogusInputWorkflow)
        bogusInputSubscription.create()

        parentFile1 = File(lfn="parentFile1")
        parentFile1.create()
        parentFile2 = File(lfn="parentFile2")
        parentFile2.create()
        parentFile3 = File(lfn="parentFile3")
        parentFile3.create()
        parentFile4 = File(lfn="parentFile4")
        parentFile4.create()
        self.parentFileSite2 = File(lfn="parentFileSite2")
        self.parentFileSite2.create()

        jobGroup1 = JobGroup(subscription=inputSubscription)
        jobGroup1.create()
        jobGroup2 = JobGroup(subscription=inputSubscription)
        jobGroup2.create()
        jobGroup3 = JobGroup(subscription=bogusInputSubscription)
        jobGroup3.create()

        testJob1 = Job()
        testJob1.addFile(parentFile1)
        testJob1.create(jobGroup1)
        testJob1["state"] = "cleanout"
        testJob1["oldstate"] = "new"
        testJob1["couch_record"] = "somejive"
        testJob1["retry_count"] = 0
        testJob1["outcome"] = "success"
        testJob1.save()
        changeStateDAO.execute([testJob1])

        testJob1A = Job()
        testJob1A.addFile(parentFile1)
        testJob1A.create(jobGroup3)
        testJob1A["state"] = "cleanout"
        testJob1A["oldstate"] = "new"
        testJob1A["couch_record"] = "somejive"
        testJob1A["retry_count"] = 0
        testJob1A["outcome"] = "failure"
        testJob1A.save()
        changeStateDAO.execute([testJob1A])

        testJob2 = Job()
        testJob2.addFile(parentFile2)
        testJob2.create(jobGroup1)
        testJob2["state"] = "cleanout"
        testJob2["oldstate"] = "new"
        testJob2["couch_record"] = "somejive"
        testJob2["retry_count"] = 0
        testJob2["outcome"] = "success"
        testJob2.save()
        changeStateDAO.execute([testJob2])

        testJob3 = Job()
        testJob3.addFile(parentFile3)
        testJob3.create(jobGroup2)
        testJob3["state"] = "cleanout"
        testJob3["oldstate"] = "new"
        testJob3["couch_record"] = "somejive"
        testJob3["retry_count"] = 0
        testJob3["outcome"] = "success"
        testJob3.save()
        changeStateDAO.execute([testJob3])

        testJob4 = Job()
        testJob4.addFile(parentFile4)
        testJob4.create(jobGroup2)
        testJob4["state"] = "cleanout"
        testJob4["oldstate"] = "new"
        testJob4["couch_record"] = "somejive"
        testJob4["retry_count"] = 0
        testJob4["outcome"] = "failure"
        testJob4.save()
        changeStateDAO.execute([testJob4])

        # We'll simulate a failed split by event job that the merger should
        # ignore.
        parentFile5 = File(lfn="parentFile5")
        parentFile5.create()

        testJob5 = Job()
        testJob5.addFile(parentFile5)
        testJob5.create(jobGroup2)
        testJob5["state"] = "cleanout"
        testJob5["oldstate"] = "new"
        testJob5["couch_record"] = "somejive"
        testJob5["retry_count"] = 0
        testJob5["outcome"] = "success"
        testJob5.save()
        changeStateDAO.execute([testJob5])

        testJob6 = Job()
        testJob6.addFile(parentFile5)
        testJob6.create(jobGroup2)
        testJob6["state"] = "cleanout"
        testJob6["oldstate"] = "new"
        testJob6["couch_record"] = "somejive"
        testJob6["retry_count"] = 0
        testJob6["outcome"] = "failure"
        testJob6.save()
        changeStateDAO.execute([testJob6])

        testJob7 = Job()
        testJob7.addFile(self.parentFileSite2)
        testJob7.create(jobGroup2)
        testJob7["state"] = "cleanout"
        testJob7["oldstate"] = "new"
        testJob7["couch_record"] = "somejive"
        testJob7["retry_count"] = 0
        testJob7["outcome"] = "success"
        testJob7.save()
        changeStateDAO.execute([testJob7])

        badFile1 = File(lfn="badFile1",
                        size=10241024,
                        events=10241024,
                        first_event=0,
                        locations=set(["T2_CH_CERN"]))
        badFile1.addRun(Run(1, *[45]))
        badFile1.create()
        badFile1.addParent(parentFile5["lfn"])

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file1.addParent(parentFile1["lfn"])
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T2_CH_CERN"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file2.addParent(parentFile1["lfn"])
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T2_CH_CERN"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file3.addParent(parentFile1["lfn"])
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations=set(["T2_CH_CERN"]))
        file4.addRun(Run(1, *[45]))
        file4.create()
        file4.addParent(parentFile1["lfn"])

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileA.addParent(parentFile2["lfn"])
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T2_CH_CERN"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileB.addParent(parentFile2["lfn"])
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T2_CH_CERN"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        fileC.addParent(parentFile2["lfn"])

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileI.addParent(parentFile3["lfn"])
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations=set(["T2_CH_CERN"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileII.addParent(parentFile3["lfn"])
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=1024,
                       first_event=2048,
                       locations=set(["T2_CH_CERN"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIII.addParent(parentFile3["lfn"])
        fileIV = File(lfn="fileIV",
                      size=1024,
                      events=1024,
                      first_event=3072,
                      locations=set(["T2_CH_CERN"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()
        fileIV.addParent(parentFile3["lfn"])

        fileX = File(lfn="badFileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileX.addRun(Run(1, *[47]))
        fileX.create()
        fileX.addParent(parentFile4["lfn"])
        fileY = File(lfn="badFileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T2_CH_CERN"]))
        fileY.addRun(Run(1, *[47]))
        fileY.create()
        fileY.addParent(parentFile4["lfn"])
        fileZ = File(lfn="badFileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T2_CH_CERN"]))
        fileZ.addRun(Run(1, *[47]))
        fileZ.create()
        fileZ.addParent(parentFile4["lfn"])

        jobGroup1.output.addFile(file1)
        jobGroup1.output.addFile(file2)
        jobGroup1.output.addFile(file3)
        jobGroup1.output.addFile(file4)
        jobGroup1.output.addFile(fileA)
        jobGroup1.output.addFile(fileB)
        jobGroup1.output.addFile(fileC)
        jobGroup1.output.commit()

        jobGroup2.output.addFile(fileI)
        jobGroup2.output.addFile(fileII)
        jobGroup2.output.addFile(fileIII)
        jobGroup2.output.addFile(fileIV)
        jobGroup2.output.addFile(fileX)
        jobGroup2.output.addFile(fileY)
        jobGroup2.output.addFile(fileZ)
        jobGroup2.output.addFile(badFile1)
        jobGroup2.output.commit()

        for file in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV, fileX, fileY, fileZ, badFile1
        ]:
            self.mergeFileset.addFile(file)
            self.bogusFileset.addFile(file)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return