Example #1
0
    def testCreateDeleteExistsNoFiles(self):
        """
        _testCreateDeleteExistsNoFiles_

        Create and then delete a job but don't add any input files to it.
        Use the job class's exists() method to determine if the job has been
        written to the database before it is created, after it has been created
        and after it has been deleted.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job(name="TestJob")

        assert testJob.exists() == False, "ERROR: Job exists before it was created"

        testJob.create(group=testJobGroup)

        assert testJob.exists() >= 0, "ERROR: Job does not exist after it was created"

        testJob.delete()

        assert testJob.exists() == False, "ERROR: Job exists after it was delete"

        return
Example #2
0
    def test_AutoIncrementCheck(self):
        """
        _AutoIncrementCheck_

        Test and see whether we can find and set the auto_increment values
        """
        myThread = threading.currentThread()
        if not myThread.dialect.lower() == "mysql":
            return

        testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck")
        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 1)

        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 2)

        incrementDAO.execute(input=10)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 11)

        incrementDAO.execute(input=5)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 12)

        return
Example #3
0
    def testMultiRunHarvesting(self):
        """
        _testMultiRunHarvesting_

        Provided a fileset with a couple of files and different runs, create a
        single job for all the runs at a specific location, which also adds a
        baggage to the job (True) which is later on looked up by SetupCMSSWPSet.
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="amaltaro",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(dqmHarvestUnit="multiRun")
        self.assertEqual(len(jobGroups), 1)

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 1)
Example #4
0
    def testA(self):
        """
        _testA_

        Handle AddWorkflowToManage events
        """
        myThread = threading.currentThread()
        config = self.getConfig()

        testWorkflowManager = WorkflowManager(config)
        testWorkflowManager.prepareToStart()

        for i in xrange(0, WorkflowManagerTest._maxMessage):

            workflow = Workflow(spec = "testSpec.xml", owner = "riahi", \
               name = "testWorkflow" + str(i), task = "testTask")
            workflow.create()

            for j in xrange(0, 3):
                workflowManagerdict = {'payload':{'WorkflowId' : workflow.id \
          , 'FilesetMatch': 'FILESET_' + str(j) ,'SplitAlgo':'NO SPLITALGO', 'Type':'NO TYPE' }}
                testWorkflowManager.handleMessage( \
      type = 'AddWorkflowToManage' , payload = workflowManagerdict )

        time.sleep(30)

        myThread.workerThreadManager.terminateWorkers()

        while threading.activeCount() > 1:
            print('Currently: '+str(threading.activeCount())+\
                ' Threads. Wait until all our threads have finished')
            time.sleep(1)
Example #5
0
    def createLargeFileBlock(self):
        """
        _createLargeFileBlock_
        
        Creates a large group of files for testing
        """
        testFileset = Fileset(name = "TestFilesetX")
        testFileset.create()
        for i in range(5000):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["somese.cern.ch"]))
            newFile.create()
            testFileset.addFile(newFile)
        testFileset.commit()
            
        testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman",
                                name = "wf003", task="Test" )
        testWorkflow.create()

        largeSubscription = Subscription(fileset = testFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "FileBased",
                                                   type = "Processing")
        largeSubscription.create()

        return largeSubscription
Example #6
0
    def setUp(self):
        """
        _setUp_

        Create a single subscription with one file.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)
        
        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "site1", seName = "somese.cern.ch")
        locationAction.execute(siteName = "site2", seName = "otherse.cern.ch")
        
        self.testFileset = Fileset(name = "TestFileset1")
        self.testFileset.create()
        
        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test" )
        testWorkflow.create()
        self.testSubscription = Subscription(fileset = self.testFileset,
                                             workflow = testWorkflow,
                                             split_algo = "Periodic",
                                             type = "Processing")
        self.testSubscription.create()
        return
Example #7
0
    def testMask(self):
        """
        _testMask_

        Test the new mask setup
        """

        testWorkflow = Workflow(spec="spec.xml", owner="Steve",
                                name="wf001", task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102])
        testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202])
        testJob.create(group=testJobGroup)

        loadJob = Job(id=testJob.exists())
        loadJob.loadData()

        runs = loadJob['mask'].getRunAndLumis()
        self.assertEqual(len(runs), 2)
        self.assertEqual(runs[100], [[101, 102]])
        self.assertEqual(runs[200], [[201, 202]])

        bigRun = Run(100, *[101, 102, 103, 104])
        badRun = Run(300, *[1001, 1002])
        result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun])

        self.assertEqual(len(result), 1)
        alteredRun = result.pop()
        self.assertEqual(alteredRun.run, 100)
        self.assertEqual(alteredRun.lumis, [101, 102])

        run0 = Run(300, *[1001, 1002])
        run1 = Run(300, *[1001, 1002])
        loadJob['mask'].filterRunLumisByMask([run0, run1])

        return
    def testFilesWithoutOtherSubscriptions(self):
        """
        _testFilesWithoutOtherSubscriptions_

        Test the case where files only in the delete subscription
        can happen if cleanup of the other subscriptions is fast

        """
        testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test")
        testWorkflowA.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i), size=1000, events=100, locations=set(["somese.cern.ch"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(
            fileset=testFileset, workflow=testWorkflowA, split_algo="SiblingProcessingBased", type="Processing"
        )
        testSubscriptionA.create()

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package="WMCore.WMBS", subscription=testSubscriptionA)

        result = deleteFactoryA(files_per_job=50)
        self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.")

        return
Example #9
0
    def testByRunAndRunBlacklist(self):
        """
        _testByRunAndRunWhitelist_

        Create harvesting jobs by run for the runs provided in the RunWhitelist
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro",
                                      name="TestWorkflow", task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow,
                                  split_algo="Harvest", type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(runWhitelist=[1, 2, 3, 4, 5], runBlacklist=[1, 3])
        self.assertEqual(len(jobGroups), 1, "One jobgroup per location")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 3)
Example #10
0
    def testListRunningJobs(self):
        """
        _testListRunningJobs_

        Test the ListRunningJobs DAO.
        """
        testWorkflow = Workflow(spec = makeUUID(), owner = "Steve",
                                name = makeUUID(), task="Test")
        testWorkflow.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = testWorkflow,
                                        type = "Processing")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testJobA = Job(name = makeUUID(), files = [])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group = testJobGroup)
        testJobA["state"] = "executing"

        testJobB = Job(name = makeUUID(), files = [])
        testJobB["couch_record"] = makeUUID()
        testJobB.create(group = testJobGroup)
        testJobB["state"] = "complete"

        testJobC = Job(name = makeUUID(), files = [])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group = testJobGroup)        
        testJobC["state"] = "new"

        changeStateAction = self.daoFactory(classname = "Jobs.ChangeState")
        changeStateAction.execute(jobs = [testJobA, testJobB, testJobC])

        runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs")
        runningJobs = runningJobsAction.execute()

        assert len(runningJobs) == 2, \
               "Error: Wrong number of running jobs returned."

        for runningJob in runningJobs:
            if runningJob["job_name"] == testJobA["name"]:
                assert runningJob["state"] == testJobA["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobA["couch_record"], \
                       "Error: Running job has wrong couch record."
            else:
                assert runningJob["job_name"] == testJobC["name"], \
                       "Error: Running job has wrong name."
                assert runningJob["state"] == testJobC["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobC["couch_record"], \
                       "Error: Running job has wrong couch record."                

        return
Example #11
0
    def testStoreResults(self):
        """
        _testStoreResults_

        Create a StoreResults workflow and verify it installs into WMBS
        correctly.
        """
        arguments = StoreResultsWorkloadFactory.getTestArguments()
        arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"})

        factory = StoreResultsWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload",
                                                           arguments)

        testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        testWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/StoreResults")
        testWorkflow.load()

        self.assertEqual(len(testWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["Merged"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name)

        logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        return
Example #12
0
    def generateFakeMCFile(self, numEvents = 100, firstEvent = 1,
                           lastEvent = 100, firstLumi = 1, lastLumi = 10,
                           index = 1):
        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCTestFileset %i" % index)
        singleMCFileset.create()
        newFile = File("MCFakeFileTest %i" % index, size = 1000,
                       events = numEvents,
                       locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        newFile.create()
        singleMCFileset.addFile(newFile)
        singleMCFileset.commit()
        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test")
        testWorkflow.create()

        singleMCFileSubscription = Subscription(fileset = singleMCFileset,
                                                workflow = testWorkflow,
                                                split_algo = "EventBased",
                                                type = "Production")
        singleMCFileSubscription.create()
        return singleMCFileSubscription
Example #13
0
    def testCreateTransaction(self):
        """
        _testCreateTransaction_

        Create a JobGroup and commit it to the database.  Rollback the database
        transaction and verify that the JobGroup is no longer in the database.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        testWorkflow.create()

        testFileset = WMBSFileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)

        assert testJobGroup.exists() == False, "ERROR: Job group exists before it was created"

        myThread = threading.currentThread()
        myThread.transaction.begin()

        testJobGroup.create()

        assert testJobGroup.exists() >= 0, "ERROR: Job group does not exist after it was created"

        myThread.transaction.rollback()

        assert testJobGroup.exists() == False, "ERROR: Job group exists after transaction was rolled back."

        testSubscription.delete()
        testFileset.delete()
        testWorkflow.delete()
        return
Example #14
0
    def testCreateDeleteExists(self):
        """
        _testCreateDeleteExists_

        Create a JobGroup and then delete it.  Use the JobGroup's exists()
        method to determine if it exists before it is created, after it is
        created and after it is deleted.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task="Test")
        testWorkflow.create()

        testFileset = WMBSFileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)

        self.assertFalse(testJobGroup.exists())

        testJobGroup.create()

        self.assertTrue(testJobGroup.exists())

        testJobGroup.delete()

        self.assertFalse(testJobGroup.exists())

        testSubscription.delete()
        testFileset.delete()
        testWorkflow.delete()
        return
Example #15
0
    def testCountWorkflow(self):
        """
        _testCountWorkflow_

        """
        spec = "spec.py"
        owner = "moron"
        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger,
                                dbinterface=myThread.dbi)
        countSpecDAO = daoFactory(classname="Workflow.CountWorkflowBySpec")

        workflows = []
        for i in range(0, 10):
            testWorkflow = Workflow(spec=spec, owner=owner,
                                    name="wf00%i" % i, task="task%i" % i)
            testWorkflow.create()
            workflows.append(testWorkflow)

        self.assertEqual(countSpecDAO.execute(spec=spec), 10)

        for i in range(0, 10):
            wf = workflows.pop()
            wf.delete()
            self.assertEqual(countSpecDAO.execute(spec=spec), 10 - (i + 1))

        return
Example #16
0
    def createTestJob(subscriptionType="Merge"):
        """
        _createTestJob_

        Create a test job with two files as input.  This will also create the
        appropriate workflow, jobgroup and subscription.
        """

        testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))
        testFileA.create()
        testFileB.create()

        testJob = Job(name=makeUUID(), files=[testFileA, testFileB])
        testJob["couch_record"] = "somecouchrecord"
        testJob["location"] = "test.site.ch"
        testJob.create(group=testJobGroup)
        testJob.associateFiles()

        return testJob
Example #17
0
    def testWorkflowInjectMarking(self):
        """
        _testWorkflowInjectMarking_

        Test whether or not we can mark a workflow as injected or not.
        """
        owner = "moron"

        workflows = []
        for i in range(0, 10):
            testWorkflow = Workflow(spec="sp00%i" % i, owner=owner,
                                    name="wf00%i" % i, task="task%i" % i)
            testWorkflow.create()
            workflows.append(testWorkflow)

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger,
                                dbinterface=myThread.dbi)
        getAction = daoFactory(classname="Workflow.GetInjectedWorkflows")
        markAction = daoFactory(classname="Workflow.MarkInjectedWorkflows")

        result = getAction.execute(injected=True)
        self.assertEqual(len(result), 0)
        result = getAction.execute(injected=False)
        self.assertEqual(len(result), 10)

        names = ['wf002', 'wf004', 'wf006', 'wf008']
        markAction.execute(names=names, injected=True)

        result = getAction.execute(injected=True)
        self.assertEqual(result, names)
        result = getAction.execute(injected=False)
        self.assertEqual(len(result), 6)
        return
Example #18
0
    def testCreateDeleteExists(self):
        """
        _testCreateDeleteExists_

        Test the create(), delete() and exists() methods of the workflow class
        by creating and deleting a workflow.  The exists() method will be
        called before and after creation and after deletion.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task='Test', wfType="ReReco")

        self.assertEqual(testWorkflow.exists(), False,
                         "ERROR: Workflow exists before it was created")

        testWorkflow.create()

        self.assertTrue(testWorkflow.exists() > 0,
                        "ERROR: Workflow does not exist after it has been created")

        testWorkflow.create()
        testWorkflow.delete()

        self.assertEqual(testWorkflow.exists(), False,
                         "ERROR: Workflow exists after it has been deleted")
        return
Example #19
0
    def createTestSubscription(self, nFiles, nSites=1, closeFileset=False):
        """
        _createTestSubscription_
        
        Create a set of test subscriptions for testing purposes.
        """

        if nSites > self.nSites:
            nSites = self.nSites

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        # Create a testWorkflow
        testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test")
        testWorkflow.create()

        # Create the files for each site
        for s in range(nSites):
            for i in range(nFiles):
                newFile = File(makeUUID(), size=1024, events=100, locations=set(["site%i.cern.ch" % s]))
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(
            fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing"
        )
        testSubscription.create()

        # Close the fileset
        if closeFileset:
            testFileset.markOpen(isOpen=False)

        return testSubscription
Example #20
0
    def testByRunHarvesting(self):
        """
        _testByRunHarvesting_
        Provided a fileset with a couple of files and 4 different runs, create
        one single job per run and location.
        The multiRun baggage should be false in this case.
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True, "Fileset should be open!")

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="amaltaro",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory()
        self.assertEqual(len(jobGroups), 1, "Should have created 1 job group")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 6, "Should have created 6 jobs")
            for job in jobGroup.jobs:
                baggage = job.getBaggage()
                self.assertFalse(getattr(baggage, "multiRun", False), "It's supposed to be a byRun job")
Example #21
0
    def testDifferentSubscritionIDs(self):
        """
        _testDifferentSubscriptionIDs_

        Make sure that the merge splitting still runs if the subscription ID
        is not equal to the workflow ID.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()
        dummyWorkflow = Workflow(name = "dummyWorkflow", spec = "bunk49",
                                 owner = "Steve", task="Test2")
        dummyWorkflow.create()
        dummyFileset = Fileset(name = "dummyFileset")
        dummyFileset.create()
        dummySubscription1 = Subscription(fileset = dummyFileset,
                                          workflow = dummyWorkflow,
                                          split_algo = "ParentlessMergeBySize")
        dummySubscription2 = Subscription(fileset = dummyFileset,
                                          workflow = dummyWorkflow,
                                          split_algo = "ParentlessMergeBySize")
        dummySubscription1.create()
        dummySubscription2.create()
        myThread.transaction.commit()

        self.stuffWMBS()
        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)
        result = jobFactory(min_merge_size = 4097, max_merge_size = 99999999,
                            max_merge_events = 999999999, merge_across_runs = False)
        self.assertEqual(len(result), 1)
        jobGroup = result[0]
        self.assertEqual(len(jobGroup.jobs), 2)
        return
Example #22
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """
        baseName = makeUUID()

        testWorkflow = Workflow(spec="spec.xml", owner="dmwm",
                                name="testWorkflow_%s" % baseName[:4], task="Test")
        testWorkflow.create()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'T1_US_FNAL_Disk')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
                                          i, lumisPerFile, 'T2_CH_CERN')
                testFileset.addFile(newFile)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        return testSubscription
Example #23
0
    def createWorkflow(self, task):
        """
        Register job into WMBS for each task through Workflows

        """

        specURL = self.getWorkflowURL(task)

        fileSet = Fileset(name=self.getFilesetName(task), is_open=True)
        fileSet.create()

        taskFlow = Workflow(spec=specURL, owner=self.owner, dn=self.owner_dn,
                            name=self.getWorkflowName(task), task=task.name())
        taskFlow.create()

        self.workflowDict[task.name()] = taskFlow

        # Insert workflow into task
        setattr(task.data.input.WMBS, 'WorkflowSpecURL', specURL)

        # If the job is a merge job
        # Find the task it merges from
        # Then find the workflow for that task and assign it an output
        if hasattr(task.inputReference(), 'outputModule'):
            dummyStepName = task.inputReference().inputStep.split('/')[-1]
            taskName = task.inputReference().inputStep.split('/')[-2]
            outputModule = task.inputReference().outputModule
            if taskName not in self.workflowDict.keys():
                raise Exception('I am being asked to chain output for a task %s which does not yet exist' % taskName)
            outputWorkflow = self.workflowDict[taskName]
            outputWorkflow.addOutput(outputModule, fileSet)

        logging.info('Registered workflow for step %s' % (task.name()))

        return taskFlow, fileSet
Example #24
0
    def testDeleteTransaction(self):
        """
        _testDeleteTransaction_

        Create a workflow and commit it to the database.  Begin a transaction
        and delete the workflow, then rollback the transaction.  Use the
        workflow's exists() method to verify that the workflow doesn't exist
        in the database before create() is called, it does exist after create()
        is called, it doesn't exist after delete() is called and it does exist
        after the transaction is rolled back.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")

        self.assertEqual(testWorkflow.exists(), False, "ERROR: Workflow exists before it was created")

        testWorkflow.create()

        self.assertTrue(testWorkflow.exists() > 0, "ERROR: Workflow does not exist after it has been created")

        myThread = threading.currentThread()
        myThread.transaction.begin()

        testWorkflow.delete()

        self.assertEqual(testWorkflow.exists(), False, "ERROR: Workflow exists after it has been deleted")

        myThread.transaction.rollback()

        self.assertTrue(testWorkflow.exists() > 0, "ERROR: Workflow does not exist transaction was rolled back")

        return
Example #25
0
    def setupRepackWorkflow(self):
        """
        _setupRepackWorkflow_

        Populate WMBS with a repack-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Repack_Run481516_StreamZ'
        mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW',
                      'RepackMergewrite_SingleNeutrino_RAW']

        self.stateMap = {'Merge': [],
                         'Processing Done': []}
        self.orderedStates = ['Merge', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        repackTask = workload.newTask('Repack')
        for task in mergeTasks:
            repackTask.addTask(task)
        repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW')

        specPath = os.path.join(self.testDir, 'Repack.pkl')
        workload.save(specPath)

        # Populate WMBS
        topFileset = Fileset(name='TestStreamerFileset')
        topFileset.create()

        options = {'spec': specPath, 'owner': 'ItsAMeMario',
                   'name': workflowName, 'wfType': 'tier0'}
        topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(topFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topFileset)
        for task in mergeTasks:
            mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options)
            mergeWorkflow.create()
            unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task)
            unmergedFileset.create()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.create()
            self.stateMap['Processing Done'].append(unmergedFileset)
        cleanupWorkflow = Workflow(task='/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW',
                                   **options)
        cleanupWorkflow.create()
        unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup')
        unmergedFileset.create()
        cleanupSub = Subscription(unmergedFileset, cleanupWorkflow)
        cleanupSub.create()

        return
Example #26
0
    def testParentageByJob(self):
        """
        _testParentageByJob_
        
        Tests the DAO that assigns parentage by Job
        """

        testWorkflow = Workflow(spec = 'hello', owner = "mnorman",
                                name = "wf001", task="basicWorkload/Production")
        testWorkflow.create()
        testFileset = Fileset(name = "TestFileset")
        testFileset.create()
        testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased")
        testSubscription.create()
        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024,
                              events = 20, checksums = {'cksum': 1})
        testFileParentA.addRun(Run( 1, *[45]))
        testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024,
                              events = 20, checksums = {'cksum': 1})
        testFileParentB.addRun(Run( 1, *[45]))
        testFileParentA.create()
        testFileParentB.create()

        testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10,
                         checksums = {'cksum':1})
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()

        testJobA = Job()
        testJobA.create(group = testJobGroup)
        testJobA.addFile(testFileParentA)
        testJobA.addFile(testFileParentB)
        testJobA.associateFiles()


        parentAction = self.daofactory(classname = "Files.SetParentageByJob")
        parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']})


        testFileB = File(id = testFileA["id"])
        testFileB.loadData(parentage = 1)

        goldenFiles = [testFileParentA, testFileParentB]
        for parentFile in testFileB["parents"]:
            self.assertEqual(parentFile in goldenFiles, True,
                   "ERROR: Unknown parent file")
            goldenFiles.remove(parentFile)

        self.assertEqual(len(goldenFiles), 0,
                         "ERROR: Some parents are missing")
Example #27
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site,
                        taskType='Processing', name=None, wfPrio=1, changeState=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        changeState is an instance of the ChangeState class to make job status changes
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec, owner="tapas",
                                name=name, task="basicWorkload/Production",
                                priority=wfPrio)
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):
            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type=taskType,
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name, task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        if changeState:
            for group in jobGroupList:
                changeState.propagate(group.jobs, 'created', 'new')

        return jobGroupList
Example #28
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]):
        """
        Creates a series of jobGroups for submissions

        """

        jobGroupList = []

        testWorkflow = Workflow(
            spec=workloadSpec,
            owner="tapas",
            name=makeUUID(),
            task="basicWorkload/Production",
            owner_vogroup="phgroup",
            owner_vorole="cmsrole",
        )
        testWorkflow.create()

        # Create subscriptions
        for i in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(
                fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased"
            )
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(
                name=name,
                task=task,
                nJobs=nJobs,
                jobGroup=testJobGroup,
                fileset=testFileset,
                sub=testSubscription.exists(),
                site=site,
                bl=bl,
                wl=wl,
            )

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList
Example #29
0
    def testDeleteTransaction(self):
        """
        _testDeleteTransaction_

        Create a new job and commit it to the database.  Start a new transaction
        and delete the file from the database.  Verify that the file has been
        deleted.  After that, roll back the transaction and verify that the
        job is once again in the database.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileA.create()
        testFileB.create()

        testJob = Job(name="TestJob", files=[testFileA, testFileB])

        assert testJob.exists() is False, \
            "ERROR: Job exists before it was created"

        testJob.create(group=testJobGroup)

        assert testJob.exists() >= 0, \
            "ERROR: Job does not exist after it was created"

        myThread = threading.currentThread()
        myThread.transaction.begin()

        testJob.delete()

        assert testJob.exists() is False, \
            "ERROR: Job exists after it was delete"

        myThread.transaction.rollback()

        assert testJob.exists() >= 0, \
            "ERROR: Job does not exist after transaction was rolled back."

        return
Example #30
0
    def stuffWMBS(self):
        """
        _stuffWMBS_

        Stuff WMBS with workflows
        """
        workflow = Workflow(spec = 'spec.xml', name = 'ReRecoTest_v0Emulator',
                            task = '/ReRecoTest_v0Emulator/Test', priority = 10)
        workflow.create()
        inputFileset = Fileset(name = 'TestFileset')
        inputFileset.create()
        subscription = Subscription(inputFileset, workflow)
        subscription.create()
Example #31
0
    def testDependentReDigi(self):
        """
        _testDependentReDigi_

        Verfiy that a dependent ReDigi workflow that keeps stages out
        RAW data is created and installed into WMBS correctly.
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs()
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[1]
        defaultArguments["StepThreeConfigCacheID"] = configs[2]

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock")
        topLevelFileset.loadData()

        stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput")
        stepOneUnmergedRAWFileset.loadData()
        stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged")
        stepOneMergedRAWFileset.loadData()
        stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive")
        stepOneLogArchiveFileset.loadData()
        stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive")
        stepOneMergeLogArchiveFileset.loadData()

        stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput")
        stepTwoUnmergedDQMFileset.loadData()
        stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput")
        stepTwoUnmergedRECOFileset.loadData()
        stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged")
        stepTwoMergedDQMFileset.loadData()
        stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged")
        stepTwoMergedRECOFileset.loadData()
        stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive")
        stepTwoLogArchiveFileset.loadData()
        stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive")
        stepTwoMergeDQMLogArchiveFileset.loadData()
        stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive")
        stepTwoMergeRECOLogArchiveFileset.loadData()

        stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule")
        stepThreeUnmergedAODFileset.loadData()
        stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged")
        stepThreeMergedAODFileset.loadData()
        stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive")
        stepThreeLogArchiveFileset.loadData()

        stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive")
        stepThreeMergeLogArchiveFileset.loadData()

        stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc")
        stepOneWorkflow.load()
        self.assertEqual(stepOneWorkflow.wfType, 'redigi')
        self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(),
                        "Error: Step one missing output module.")
        self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(),
                        "Error: Step one missing output module.")
        self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG output fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id,
                         "Error: RAWDEBUG output fileset is wrong.")

        for outputMod in stepOneWorkflow.outputMap.keys():
            self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset)
        stepOneSub.loadData()
        self.assertEqual(stepOneSub["type"], "Processing",
                         "Error: Step one sub has wrong type.")

        stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                          task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput")
        stepOneCleanupWorkflow.load()
        self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup should have no output.")
        stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset)
        stepOneCleanupSub.loadData()
        self.assertEqual(stepOneCleanupSub["type"], "Cleanup",
                         "Error: Step one sub has wrong type.")

        stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/LogCollect")
        stepOneLogCollectWorkflow.load()
        self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect should have no output.")
        stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset)
        stepOneLogCollectSub.loadData()
        self.assertEqual(stepOneLogCollectSub["type"], "LogCollect",
                         "Error: Step one sub has wrong type.")

        stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                        task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput")
        stepOneMergeWorkflow.load()
        self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(),
                        "Error: Step one merge missing output module.")
        self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(),
                        "Error: Step one merge missing output module.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG merge output fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG merge output fileset is wrong.")
        for outputMod in stepOneMergeWorkflow.outputMap.keys():
            self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")
        stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset)
        stepOneMergeSub.loadData()
        self.assertEqual(stepOneMergeSub["type"], "Merge",
                         "Error: Step one sub has wrong type.")

        stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc")
        stepTwoWorkflow.load()
        self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset)
        stepTwoSub.loadData()
        self.assertEqual(stepTwoSub["type"], "Processing",
                         "Error: Step two sub has wrong type.")

        for outputMod in stepTwoWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput")
        stepTwoCleanupDQMWorkflow.load()
        self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoCleanupDQMSub.loadData()
        self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                              task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput")
        stepTwoCleanupRECOWorkflow.load()
        self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoCleanupRECOSub.loadData()
        self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect")
        stepTwoLogCollectWorkflow.load()
        self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect shouldn't have any output.")
        stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset)
        stepTwoLogCollectSub.loadData()
        self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect",
                         "Error: Step two sub has wrong type.")

        stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput")
        stepTwoMergeRECOWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoMergeRECOSub.loadData()
        self.assertEqual(stepTwoMergeRECOSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                           task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput")
        stepTwoMergeDQMWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoMergeDQMSub.loadData()
        self.assertEqual(stepTwoMergeDQMSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                     task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc")
        stepThreeWorkflow.load()
        self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(),
                        "Error: Step three missing output module.")
        self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(),
                        "Error: Step three missing output module.")
        self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset)
        stepThreeSub.loadData()
        self.assertEqual(stepThreeSub["type"], "Processing",
                         "Error: Step three sub has wrong type.")
        for outputMod in stepThreeWorkflow.outputMap.keys():
            self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule")
        stepThreeCleanupWorkflow.load()
        self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup should have no output.")
        stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset)
        stepThreeCleanupSub.loadData()
        self.assertEqual(stepThreeCleanupSub["type"], "Cleanup",
                         "Error: Step three sub has wrong type.")

        stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                               task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect")
        stepThreeLogCollectWorkflow.load()
        self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect should have no output.")
        stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset)
        stepThreeLogCollectSub.loadData()
        self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect",
                         "Error: Step three sub has wrong type.")

        stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                          task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule")
        stepThreeMergeWorkflow.load()
        self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(),
                        "Error: Step three merge missing output module.")
        self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(),
                        "Error: Step three merge missing output module.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset)
        stepThreeMergeSub.loadData()
        self.assertEqual(stepThreeMergeSub["type"], "Merge",
                         "Error: Step three sub has wrong type.")
        for outputMod in stepThreeMergeWorkflow.outputMap.keys():
            self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        return
Example #32
0
    def testGetOutputMapDAO(self):
        """
        _testGetOutputMapDAO_

        Verify the proper behavior of the GetOutputMapDAO for a variety of
        different processing chains.
        """
        recoOutputFileset = Fileset(name="RECO")
        recoOutputFileset.create()
        mergedRecoOutputFileset = Fileset(name="MergedRECO")
        mergedRecoOutputFileset.create()
        alcaOutputFileset = Fileset(name="ALCA")
        alcaOutputFileset.create()
        mergedAlcaOutputFileset = Fileset(name="MergedALCA")
        mergedAlcaOutputFileset.create()
        dqmOutputFileset = Fileset(name="DQM")
        dqmOutputFileset.create()
        mergedDqmOutputFileset = Fileset(name="MergedDQM")
        mergedDqmOutputFileset.create()
        cleanupFileset = Fileset(name="Cleanup")
        cleanupFileset.create()

        testWorkflow = Workflow(spec="wf001.xml",
                                owner="Steve",
                                name="TestWF",
                                task="None")
        testWorkflow.create()
        testWorkflow.addOutput("output", recoOutputFileset,
                               mergedRecoOutputFileset)
        testWorkflow.addOutput("ALCARECOStreamCombined", alcaOutputFileset,
                               mergedAlcaOutputFileset)
        testWorkflow.addOutput("DQM", dqmOutputFileset, mergedDqmOutputFileset)
        testWorkflow.addOutput("output", cleanupFileset)
        testWorkflow.addOutput("ALCARECOStreamCombined", cleanupFileset)
        testWorkflow.addOutput("DQM", cleanupFileset)

        testRecoMergeWorkflow = Workflow(spec="wf002.xml",
                                         owner="Steve",
                                         name="TestRecoMergeWF",
                                         task="None")
        testRecoMergeWorkflow.create()
        testRecoMergeWorkflow.addOutput("anything", mergedRecoOutputFileset,
                                        mergedRecoOutputFileset)

        testRecoProcWorkflow = Workflow(spec="wf004.xml",
                                        owner="Steve",
                                        name="TestRecoProcWF",
                                        task="None")
        testRecoProcWorkflow.create()

        testAlcaChildWorkflow = Workflow(spec="wf003.xml",
                                         owner="Steve",
                                         name="TestAlcaChildWF",
                                         task="None")
        testAlcaChildWorkflow.create()

        inputFile = File(lfn="/path/to/some/lfn",
                         size=600000,
                         events=60000,
                         locations="cmssrm.fnal.gov")
        inputFile.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()
        testFileset.addFile(inputFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="EventBased",
                                        type="Processing")

        testMergeRecoSubscription = Subscription(
            fileset=recoOutputFileset,
            workflow=testRecoMergeWorkflow,
            split_algo="WMBSMergeBySize",
            type="Merge")
        testProcRecoSubscription = Subscription(fileset=recoOutputFileset,
                                                workflow=testRecoProcWorkflow,
                                                split_algo="FileBased",
                                                type="Processing")

        testChildAlcaSubscription = Subscription(
            fileset=alcaOutputFileset,
            workflow=testAlcaChildWorkflow,
            split_algo="FileBased",
            type="Processing")
        testSubscription.create()
        testMergeRecoSubscription.create()
        testProcRecoSubscription.create()
        testChildAlcaSubscription.create()
        testSubscription.acquireFiles()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job(name="SplitJobA", files=[inputFile])
        testJob.create(group=testJobGroup)
        testJob["state"] = "complete"
        testJob.save()

        outputMapAction = self.daoFactory(classname="Jobs.GetOutputMap")
        outputMap = outputMapAction.execute(jobID=testJob["id"])

        assert len(outputMap.keys()) == 3, \
               "Error: Wrong number of outputs for primary workflow."

        goldenMap = {
            "output": (recoOutputFileset.id, mergedRecoOutputFileset.id),
            "ALCARECOStreamCombined":
            (alcaOutputFileset.id, mergedAlcaOutputFileset.id),
            "DQM": (dqmOutputFileset.id, mergedDqmOutputFileset.id)
        }

        for outputID in outputMap.keys():
            for outputFilesets in outputMap[outputID]:
                if outputFilesets["merged_output_fileset"] == None:
                    self.assertEqual(outputFilesets["output_fileset"],
                                     cleanupFileset.id,
                                     "Error: Cleanup fileset is wrong.")
                    continue

                self.assertTrue(outputID in goldenMap.keys(),
                                "Error: Output identifier is missing.")
                self.assertEqual(outputFilesets["output_fileset"],
                                 goldenMap[outputID][0],
                                 "Error: Output fileset is wrong.")
                self.assertEqual(outputFilesets["merged_output_fileset"],
                                 goldenMap[outputID][1],
                                 "Error: Merged output fileset is wrong.")
                del goldenMap[outputID]

        self.assertEqual(len(goldenMap.keys()), 0,
                         "Error: Missing output maps.")

        return
Example #33
0
    def verifyDiscardRAW(self):
        """
        _verifyDiscardRAW_

        Verify that a workflow that discards the RAW was installed into WMBS
        correctly.
        """
        topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock")
        topLevelFileset.loadData()

        stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-DQMoutput")
        stepTwoUnmergedDQMFileset.loadData()
        stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RECODEBUGoutput")
        stepTwoUnmergedRECOFileset.loadData()
        stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-Merged")
        stepTwoMergedDQMFileset.loadData()
        stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-Merged")
        stepTwoMergedRECOFileset.loadData()
        stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive")
        stepTwoLogArchiveFileset.loadData()
        stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-logArchive")
        stepTwoMergeDQMLogArchiveFileset.loadData()
        stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-logArchive")
        stepTwoMergeRECOLogArchiveFileset.loadData()

        stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc")
        stepTwoWorkflow.load()
        self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset)
        stepTwoSub.loadData()
        self.assertEqual(stepTwoSub["type"], "Processing",
                         "Error: Step two sub has wrong type.")

        for outputMod in stepTwoWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedDQMoutput")
        stepTwoCleanupDQMWorkflow.load()
        self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoCleanupDQMSub.loadData()
        self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                              task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRECODEBUGoutput")
        stepTwoCleanupRECOWorkflow.load()
        self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoCleanupRECOSub.loadData()
        self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/LogCollect")
        stepTwoLogCollectWorkflow.load()
        self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect shouldn't have any output.")
        stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset)
        stepTwoLogCollectSub.loadData()
        self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect",
                         "Error: Step two sub has wrong type.")

        stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput")
        stepTwoMergeRECOWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoMergeRECOSub.loadData()
        self.assertEqual(stepTwoMergeRECOSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                           task = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput")
        stepTwoMergeDQMWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoMergeDQMSub.loadData()
        self.assertEqual(stepTwoMergeDQMSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")
        return
Example #34
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package="T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """,
                                 transaction=False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN2')
                                    """,
                                 transaction=False)

        insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
        insertRunDAO.execute(binds={
            'RUN': 1,
            'TIME': int(time.time()),
            'HLTKEY': "someHLTKey"
        },
                             transaction=False)

        insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
        for lumi in [1, 2]:
            insertLumiDAO.execute(binds={
                'RUN': 1,
                'LUMI': lumi
            },
                                  transaction=False)

        insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
        insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False)

        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        self.fileset1 = Fileset(name="TestFileset1")
        self.fileset1.load()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Express",
                                          type="Express")
        self.subscription1.create()

        # keep for later
        self.insertClosedLumiDAO = daoFactory(
            classname="RunLumiCloseout.InsertClosedLumi")
        self.releaseExpressDAO = daoFactory(
            classname="Tier0Feeder.ReleaseExpress")
        self.currentTime = int(time.time())

        return
    def testMonteCarloFromGEN(self):
        """
        _testMonteCarloFromGEN_

        Create a MonteCarloFromGEN workflow and verify it installs into WMBS
        correctly.
        """
        arguments = MonteCarloFromGENWorkloadFactory.getTestArguments()
        arguments["ConfigCacheID"] = self.injectConfig()
        arguments["CouchDBName"] = "mclhe_t"
        arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias"

        factory = MonteCarloFromGENWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction(
            "TestWorkload", arguments)

        outputDatasets = testWorkload.listOutputDatasets()
        self.assertEqual(len(outputDatasets), 2)
        self.assertTrue(
            "/WaitThisIsNotMinimumBias/FAKE-FilterRECO-FAKE-v1/RECO" in
            outputDatasets)
        self.assertTrue(
            "/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-FAKE-v1/ALCARECO" in
            outputDatasets)

        productionTask = testWorkload.getTaskByPath(
            '/TestWorkload/MonteCarloFromGEN')
        splitting = productionTask.jobSplittingParameters()
        self.assertFalse(splitting["deterministicPileup"])

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "MonteCarloFromGEN",
                                    "SomeBlock",
                                    cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/MonteCarloFromGEN")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")
        self.assertEqual(procWorkflow.wfType, 'production')

        goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged"
                % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/MonteCarloFromGEN/unmerged-%s" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s"
                % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(
            name="TestWorkload-MonteCarloFromGEN-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(
            procSubscription["type"], "Production",
            "Error: Wrong subscription type: %s" % procSubscription["type"])
        self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(
            name="/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO"
        )
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset=unmergedReco,
                                         workflow=recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            mergeSubscription["split_algo"], "ParentlessMergeBySize",
            "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        unmergedAlca = Fileset(
            name=
            "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO"
        )
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset=unmergedAlca,
                                         workflow=alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            mergeSubscription["split_algo"], "ParentlessMergeBySize",
            "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]:
            unmerged = Fileset(
                name="/TestWorkload/MonteCarloFromGEN/unmerged-%s" %
                procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s"
                % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name="/TestWorkload/MonteCarloFromGEN/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/MonteCarloFromGEN/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive"
        )
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect"
        )
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive"
        )
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect"
        )
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #36
0
    def _commonMonteCarloTest(self):
        """
        Retrieve the workload from WMBS and test all its properties.
        """
        prodWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production")
        prodWorkflow.load()

        self.assertEqual(len(prodWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["OutputA", "OutputB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-Production-SomeBlock")
        topLevelFileset.loadData()

        prodSubscription = Subscription(fileset = topLevelFileset, workflow = prodWorkflow)
        prodSubscription.loadData()

        self.assertEqual(prodSubscription["type"], "Production",
                         "Error: Wrong subscription type.")
        self.assertEqual(prodSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algo.")

        for outputName in ["OutputA", "OutputB"]:
            unmergedOutput = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName)
            unmergedOutput.loadData()
            mergeWorkflow = Workflow(name = "TestWorkload",
                                            task = "/TestWorkload/Production/ProductionMerge%s" % outputName)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedOutput, workflow = mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for outputName in ["OutputA", "OutputB"]:
            unmerged = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/Production/ProductionCleanupUnmerged%s" % outputName)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/Production/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/Production/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        for outputName in ["OutputA", "OutputB"]:
            mergeLogCollect = Fileset(name = "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % outputName)
            mergeLogCollect.loadData()
            mergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                              task = "/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % (outputName, outputName))
            mergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset = mergeLogCollect, workflow = mergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")
Example #37
0
    def pollSubscriptions(self):
        """
        Poller for looking in all active subscriptions for jobs that need to be made.

        """
        logging.info("Beginning JobCreator.pollSubscriptions() cycle.")
        myThread = threading.currentThread()

        # First, get list of Subscriptions
        subscriptions = self.subscriptionList.execute()

        # Okay, now we have a list of subscriptions
        for subscriptionID in subscriptions:
            wmbsSubscription = Subscription(id=subscriptionID)
            try:
                wmbsSubscription.load()
            except IndexError:
                # This happens when the subscription no longer exists
                # i.e., someone executed a kill() function on the database
                # while the JobCreator was in cycle
                # Ignore this subscription
                msg = "JobCreator cannot load subscription %i" % subscriptionID
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                continue

            workflow = Workflow(id=wmbsSubscription["workflow"].id)
            workflow.load()
            wmbsSubscription['workflow'] = workflow
            wmWorkload = retrieveWMSpec(workflow=workflow)

            if not workflow.task or not wmWorkload:
                # Then we have a problem
                # We NEED a sandbox
                # Abort this subscription!
                # But do NOT fail
                # We have no way of marking a subscription as bad per se
                # We'll have to just keep skipping it
                msg = "Have no task for workflow %i\n" % (workflow.id)
                msg += "Aborting Subscription %i" % (subscriptionID)
                logging.error(msg)
                self.sendAlert(1, msg=msg)
                continue

            logging.debug("Have loaded subscription %i with workflow %i\n",
                          subscriptionID, workflow.id)

            # retrieve information from the workload to propagate down to the job configuration
            allowOpport = wmWorkload.getAllowOpportunistic()

            # Set task object
            wmTask = wmWorkload.getTaskByPath(workflow.task)

            # Get generators
            # If you fail to load the generators, pass on the job
            try:
                if hasattr(wmTask.data, 'generators'):
                    manager = GeneratorManager(wmTask)
                    seederList = manager.getGeneratorList()
                else:
                    seederList = []
            except Exception as ex:
                msg = "Had failure loading generators for subscription %i\n" % (
                    subscriptionID)
                msg += "Exception: %s\n" % str(ex)
                msg += "Passing over this error.  It will reoccur next interation!\n"
                msg += "Please check or remove this subscription!\n"
                logging.error(msg)
                self.sendAlert(6, msg=msg)
                continue

            logging.debug(
                "Going to call wmbsJobFactory for sub %i with limit %i",
                subscriptionID, self.limit)

            splitParams = retrieveJobSplitParams(wmWorkload, workflow.task)
            logging.debug("Split Params: %s", splitParams)

            # Load the proper job splitting module
            splitterFactory = SplitterFactory(
                splitParams.get('algo_package', "WMCore.JobSplitting"))
            # and return an instance of the splitting algorithm
            wmbsJobFactory = splitterFactory(package="WMCore.WMBS",
                                             subscription=wmbsSubscription,
                                             generators=seederList,
                                             limit=self.limit)

            # Turn on the jobFactory --> get available files for that subscription, keep result proxies
            wmbsJobFactory.open()

            # Create a function to hold it, calling __call__ from the JobFactory
            # which then calls algorithm method of the job splitting algo instance
            jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory,
                                               splitParams=splitParams)

            # Now we get to find out how many jobs there are.
            jobNumber = self.countJobs.execute(workflow=workflow.id,
                                               conn=myThread.transaction.conn,
                                               transaction=True)
            jobNumber += splitParams.get('initial_lfn_counter', 0)
            logging.debug("Have %i jobs for workflow %s already in database.",
                          jobNumber, workflow.name)

            continueSubscription = True
            while continueSubscription:
                # This loop runs over the jobFactory,
                # using yield statements and a pre-existing proxy to
                # generate and process new jobs

                # First we need the jobs.
                myThread.transaction.begin()
                try:
                    wmbsJobGroups = next(jobSplittingFunction)
                    logging.info("Retrieved %i jobGroups from jobSplitter",
                                 len(wmbsJobGroups))
                except StopIteration:
                    # If you receive a stopIteration, we're done
                    logging.info("Completed iteration over subscription %i",
                                 subscriptionID)
                    continueSubscription = False
                    myThread.transaction.commit()
                    break

                # If we have no jobGroups, we're done
                if len(wmbsJobGroups) == 0:
                    logging.info("Found end in iteration over subscription %i",
                                 subscriptionID)
                    continueSubscription = False
                    myThread.transaction.commit()
                    break

                # Assemble a dict of all the info
                processDict = {
                    'workflow': workflow,
                    'wmWorkload': wmWorkload,
                    'wmTaskName': wmTask.getPathName(),
                    'jobNumber': jobNumber,
                    'sandbox': wmTask.data.input.sandbox,
                    'owner': wmWorkload.getOwner().get('name', None),
                    'ownerDN': wmWorkload.getOwner().get('dn', None),
                    'ownerGroup': wmWorkload.getOwner().get('vogroup', ''),
                    'ownerRole': wmWorkload.getOwner().get('vorole', ''),
                    'numberOfCores': 1,
                    'inputDataset': wmTask.getInputDatasetPath()
                }
                try:
                    maxCores = 1
                    stepNames = wmTask.listAllStepNames()
                    for stepName in stepNames:
                        sh = wmTask.getStep(stepName)
                        maxCores = max(maxCores, sh.getNumberOfCores())
                    processDict.update({'numberOfCores': maxCores})
                except AttributeError:
                    logging.info(
                        "Failed to read multicore settings from task %s",
                        wmTask.getPathName())

                tempSubscription = Subscription(id=wmbsSubscription['id'])

                # if we have glideinWMS constraints, then adapt all jobs
                if self.glideinLimits:
                    capResourceEstimates(wmbsJobGroups, self.glideinLimits)

                nameDictList = []
                for wmbsJobGroup in wmbsJobGroups:
                    # For each jobGroup, put a dictionary
                    # together and run it with creatorProcess
                    jobsInGroup = len(wmbsJobGroup.jobs)
                    wmbsJobGroup.subscription = tempSubscription
                    tempDict = {}
                    tempDict.update(processDict)
                    tempDict['jobGroup'] = wmbsJobGroup
                    tempDict['swVersion'] = wmTask.getSwVersion()
                    tempDict['scramArch'] = wmTask.getScramArch()
                    tempDict['jobNumber'] = jobNumber
                    tempDict['agentNumber'] = self.agentNumber
                    tempDict[
                        'inputDatasetLocations'] = wmbsJobGroup.getLocationsForJobs(
                        )
                    tempDict['allowOpportunistic'] = allowOpport

                    jobGroup = creatorProcess(work=tempDict,
                                              jobCacheDir=self.jobCacheDir)
                    jobNumber += jobsInGroup

                    # Set jobCache for group
                    for job in jobGroup.jobs:
                        nameDictList.append({
                            'jobid': job['id'],
                            'cacheDir': job['cache_dir']
                        })
                        job["user"] = wmWorkload.getOwner()["name"]
                        job["group"] = wmWorkload.getOwner()["group"]
                # Set the caches in the database
                try:
                    if len(nameDictList) > 0:
                        self.setBulkCache.execute(
                            jobDictList=nameDictList,
                            conn=myThread.transaction.conn,
                            transaction=True)
                except WMException:
                    raise
                except Exception as ex:
                    msg = "Unknown exception while setting the bulk cache:\n"
                    msg += str(ex)
                    logging.error(msg)
                    self.sendAlert(6, msg=msg)
                    logging.debug(
                        "Error while setting bulkCache with following values: %s\n",
                        nameDictList)
                    raise JobCreatorException(msg)

                # Advance the jobGroup in changeState
                for wmbsJobGroup in wmbsJobGroups:
                    self.advanceJobGroup(wmbsJobGroup=wmbsJobGroup)

                # Now end the transaction so that everything is wrapped
                # in a single rollback
                myThread.transaction.commit()

            # END: While loop over jobFactory

            # Close the jobFactory
            wmbsJobFactory.close()

        return
Example #38
0
    def testTruncatedWFInsertion(self):
        """
        _testTruncatedWFInsertion_

        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   seName='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testTopLevelTask, testWMBSHelper.topLevelFileset)

        testWorkload.truncate("ResubmitTestWorkload",
                              "/TestWorkload/ProcessingTask/MergeTask",
                              "someserver", "somedatabase")

        # create  the subscription for multiple top task (MergeTask and CleanupTask for the same block)
        for task in testWorkload.getTopLevelTask():
            testResubmitWMBSHelper = WMBSHelper(testWorkload,
                                                task.name(),
                                                "SomeBlock2",
                                                cachepath=self.workDir)
            testResubmitWMBSHelper.createTopLevelFileset()
            testResubmitWMBSHelper._createSubscriptionsInWMBS(
                task, testResubmitWMBSHelper.topLevelFileset)

        mergeWorkflow = Workflow(name="ResubmitTestWorkload",
                                 task="/ResubmitTestWorkload/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name,
                         "/ResubmitTestWorkload/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="ResubmitTestWorkload",
            task="/ResubmitTestWorkload/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(
            name="ResubmitTestWorkload-MergeTask-SomeBlock2")
        topLevelFileset.loadData()

        mergeSubscription = Subscription(fileset=topLevelFileset,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        return
Example #39
0
    def setupForKillTest(self, baAPI=None):
        """
        _setupForKillTest_

        Inject a workflow into WMBS that has a processing task, a merge task and
        a cleanup task.  Inject files into the various tasks at various
        processing states (acquired, complete, available...).  Also create jobs
        for each subscription in various states.
        """
        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daoFactory(classname="Locations.New")
        changeStateAction = daoFactory(classname="Jobs.ChangeState")
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \
                                        maxSlots = 10000, pendingSlots = 10000)

        userDN = 'someDN'
        userAction = daoFactory(classname="Users.New")
        userAction.execute(dn=userDN,
                           group_name='DEFAULT',
                           role_name='DEFAULT')

        inputFileset = Fileset("input")
        inputFileset.create()

        inputFileA = File("lfnA", locations="goodse.cern.ch")
        inputFileB = File("lfnB", locations="goodse.cern.ch")
        inputFileC = File("lfnC", locations="goodse.cern.ch")
        inputFileA.create()
        inputFileB.create()
        inputFileC.create()

        inputFileset.addFile(inputFileA)
        inputFileset.addFile(inputFileB)
        inputFileset.addFile(inputFileC)
        inputFileset.commit()

        unmergedOutputFileset = Fileset("unmerged")
        unmergedOutputFileset.create()

        unmergedFileA = File("ulfnA", locations="goodse.cern.ch")
        unmergedFileB = File("ulfnB", locations="goodse.cern.ch")
        unmergedFileC = File("ulfnC", locations="goodse.cern.ch")
        unmergedFileA.create()
        unmergedFileB.create()
        unmergedFileC.create()

        unmergedOutputFileset.addFile(unmergedFileA)
        unmergedOutputFileset.addFile(unmergedFileB)
        unmergedOutputFileset.addFile(unmergedFileC)
        unmergedOutputFileset.commit()

        mainProcWorkflow = Workflow(spec="spec1",
                                    owner="Steve",
                                    name="Main",
                                    task="Proc")
        mainProcWorkflow.create()
        mainProcMergeWorkflow = Workflow(spec="spec1",
                                         owner="Steve",
                                         name="Main",
                                         task="ProcMerge")
        mainProcMergeWorkflow.create()
        mainCleanupWorkflow = Workflow(spec="spec1",
                                       owner="Steve",
                                       name="Main",
                                       task="Cleanup")
        mainCleanupWorkflow.create()

        self.mainProcSub = Subscription(fileset=inputFileset,
                                        workflow=mainProcWorkflow,
                                        type="Processing")
        self.mainProcSub.create()
        self.mainProcSub.acquireFiles(inputFileA)
        self.mainProcSub.completeFiles(inputFileB)

        procJobGroup = JobGroup(subscription=self.mainProcSub)
        procJobGroup.create()
        self.procJobA = Job(name="ProcJobA")
        self.procJobA["state"] = "new"
        self.procJobA["location"] = "site1"
        self.procJobB = Job(name="ProcJobB")
        self.procJobB["state"] = "executing"
        self.procJobB["location"] = "site1"
        self.procJobC = Job(name="ProcJobC")
        self.procJobC["state"] = "complete"
        self.procJobC["location"] = "site1"
        self.procJobA.create(procJobGroup)
        self.procJobB.create(procJobGroup)
        self.procJobC.create(procJobGroup)

        self.mainMergeSub = Subscription(fileset=unmergedOutputFileset,
                                         workflow=mainProcMergeWorkflow,
                                         type="Merge")
        self.mainMergeSub.create()
        self.mainMergeSub.acquireFiles(unmergedFileA)
        self.mainMergeSub.failFiles(unmergedFileB)

        mergeJobGroup = JobGroup(subscription=self.mainMergeSub)
        mergeJobGroup.create()
        self.mergeJobA = Job(name="MergeJobA")
        self.mergeJobA["state"] = "exhausted"
        self.mergeJobA["location"] = "site1"
        self.mergeJobB = Job(name="MergeJobB")
        self.mergeJobB["state"] = "cleanout"
        self.mergeJobB["location"] = "site1"
        self.mergeJobC = Job(name="MergeJobC")
        self.mergeJobC["state"] = "new"
        self.mergeJobC["location"] = "site1"
        self.mergeJobA.create(mergeJobGroup)
        self.mergeJobB.create(mergeJobGroup)
        self.mergeJobC.create(mergeJobGroup)

        self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset,
                                           workflow=mainCleanupWorkflow,
                                           type="Cleanup")
        self.mainCleanupSub.create()
        self.mainCleanupSub.acquireFiles(unmergedFileA)
        self.mainCleanupSub.completeFiles(unmergedFileB)

        cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub)
        cleanupJobGroup.create()
        self.cleanupJobA = Job(name="CleanupJobA")
        self.cleanupJobA["state"] = "new"
        self.cleanupJobA["location"] = "site1"
        self.cleanupJobB = Job(name="CleanupJobB")
        self.cleanupJobB["state"] = "executing"
        self.cleanupJobB["location"] = "site1"
        self.cleanupJobC = Job(name="CleanupJobC")
        self.cleanupJobC["state"] = "complete"
        self.cleanupJobC["location"] = "site1"
        self.cleanupJobA.create(cleanupJobGroup)
        self.cleanupJobB.create(cleanupJobGroup)
        self.cleanupJobC.create(cleanupJobGroup)

        jobList = [
            self.procJobA, self.procJobB, self.procJobC, self.mergeJobA,
            self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB,
            self.cleanupJobC
        ]

        changeStateAction.execute(jobList)

        if baAPI:
            for job in jobList:
                job['plugin'] = 'TestPlugin'
                job['userdn'] = userDN
                job['usergroup'] = 'DEFAULT'
                job['userrole'] = 'DEFAULT'
                job['custom']['location'] = 'site1'
            baAPI.createNewJobs(wmbsJobs=jobList)

        # We'll create an unrelated workflow to verify that it isn't affected
        # by the killing code.
        bogusFileset = Fileset("dontkillme")
        bogusFileset.create()

        bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch")
        bogusFileA.create()
        bogusFileset.addFile(bogusFileA)
        bogusFileset.commit()

        bogusWorkflow = Workflow(spec="spec2",
                                 owner="Steve",
                                 name="Bogus",
                                 task="Proc")
        bogusWorkflow.create()
        self.bogusSub = Subscription(fileset=bogusFileset,
                                     workflow=bogusWorkflow,
                                     type="Processing")
        self.bogusSub.create()
        self.bogusSub.acquireFiles(bogusFileA)
        return
Example #40
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName="site1", pnn="T2_CH_CERN")

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations="T2_CH_CERN")
            newFile.addRun(Run(i, *[45 + i]))
            newFile.create()
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations="T2_CH_CERN")
        newFile.addRun(Run(1, *[45]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleFileRunset = Fileset(name="TestFileset3")
        self.multipleFileRunset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations="T2_CH_CERN")
            newFile.addRun(Run(i / 3, *[45]))
            newFile.create()
            self.multipleFileRunset.addFile(newFile)
        self.multipleFileRunset.commit()

        self.singleRunFileset = Fileset(name="TestFileset4")
        self.singleRunFileset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations="T2_CH_CERN")
            newFile.addRun(Run(1, *[45]))
            newFile.create()
            self.singleRunFileset.addFile(newFile)
        self.singleRunFileset.commit()

        self.singleRunMultipleLumi = Fileset(name="TestFileset5")
        self.singleRunMultipleLumi.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations="T2_CH_CERN")
            newFile.addRun(Run(1, *[45 + i]))
            newFile.create()
            self.singleRunMultipleLumi.addFile(newFile)
        self.singleRunMultipleLumi.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="mnorman",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.multipleRunSubscription = Subscription(
            fileset=self.multipleFileRunset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.singleRunSubscription = Subscription(
            fileset=self.singleRunFileset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.singleRunMultipleLumiSubscription = Subscription(
            fileset=self.singleRunMultipleLumi,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")

        self.multipleFileSubscription.create()
        self.singleFileSubscription.create()
        self.multipleRunSubscription.create()
        self.singleRunSubscription.create()
        self.singleRunMultipleLumiSubscription.create()

        return
Example #41
0
    def testCreateSubscription(self):
        """
        _testCreateSubscription_

        Verify that the subscription creation code works correctly.
        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   seName='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testTopLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/ProcessingTask")
        procWorkflow.load()

        self.assertEqual(procWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner: %s" % procWorkflow.owner)
        self.assertEqual(procWorkflow.group, "DMWM",
                         "Error: Wrong group: %s" % procWorkflow.group)
        self.assertEqual(procWorkflow.wfType, "TestReReco",
                         "Error: Wrong type.")
        self.assertEqual(
            procWorkflow.spec,
            os.path.join(self.workDir, procWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(procWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        mergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "merged_output_fileset"]
        unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "output_fileset"]

        mergedProcOutput.loadData()
        unmergedProcOutput.loadData()

        self.assertEqual(
            mergedProcOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedProcOutput.name,
                         "/TestWorkload/ProcessingTask/unmerged-OutputA",
                         "Error: Unmerged output fileset is wrong.")

        mergeWorkflow = Workflow(name="TestWorkload",
                                 task="/TestWorkload/ProcessingTask/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            cleanupWorkflow.spec,
            os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(
            unmergedMergeOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(len(procSubscription.getWhiteBlackList()), 2,
                         "Error: Wrong site white/black list for proc sub.")
        for site in procSubscription.getWhiteBlackList():
            if site["site_name"] == "site1":
                self.assertEqual(site["valid"], 1,
                                 "Error: Site should be white listed.")
            else:
                self.assertEqual(site["valid"], 0,
                                 "Error: Site should be black listed.")

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        mergeSubscription = Subscription(fileset=unmergedProcOutput,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")
        return
Example #42
0
    def testGetOutputParentLFNs(self):
        """
        _testGetOutputParentLFNs_

        Verify that the getOutputDBSParentLFNs() method returns the correct
        parent LFNs.
        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Simon",
                                name="wf001",
                                task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA",
                         size=1024,
                         events=10,
                         merged=True)
        testFileB = File(lfn="/this/is/a/lfnB",
                         size=1024,
                         events=10,
                         merged=True)
        testFileC = File(lfn="/this/is/a/lfnC",
                         size=1024,
                         events=10,
                         merged=False)
        testFileD = File(lfn="/this/is/a/lfnD",
                         size=1024,
                         events=10,
                         merged=False)
        testFileE = File(lfn="/this/is/a/lfnE",
                         size=1024,
                         events=10,
                         merged=True)
        testFileF = File(lfn="/this/is/a/lfnF",
                         size=1024,
                         events=10,
                         merged=True)
        testFileA.create()
        testFileB.create()
        testFileC.create()
        testFileD.create()
        testFileE.create()
        testFileF.create()

        testFileE.addChild(testFileC["lfn"])
        testFileF.addChild(testFileD["lfn"])

        testJobA = Job(name="TestJob", files=[testFileA, testFileB])
        testJobA["couch_record"] = "somecouchrecord"
        testJobA["location"] = "test.site.ch"
        testJobA.create(group=testJobGroup)
        testJobA.associateFiles()

        testJobB = Job(name="TestJobB", files=[testFileC, testFileD])
        testJobB["couch_record"] = "somecouchrecord"
        testJobB["location"] = "test.site.ch"
        testJobB.create(group=testJobGroup)
        testJobB.associateFiles()

        goldenLFNs = ["/this/is/a/lfnA", "/this/is/a/lfnB"]

        parentLFNs = testJobA.getOutputDBSParentLFNs()
        for parentLFN in parentLFNs:
            assert parentLFN in goldenLFNs, \
                "ERROR: Unknown lfn: %s" % parentLFN
            goldenLFNs.remove(parentLFN)

        assert len(goldenLFNs) == 0, \
            "ERROR: LFNs are missing: %s" % goldenLFNs

        goldenLFNs = ["/this/is/a/lfnE", "/this/is/a/lfnF"]

        parentLFNs = testJobB.getOutputDBSParentLFNs()
        for parentLFN in parentLFNs:
            assert parentLFN in goldenLFNs, \
                "ERROR: Unknown lfn: %s" % parentLFN
            goldenLFNs.remove(parentLFN)

        assert len(goldenLFNs) == 0, \
            "ERROR: LFNs are missing..."

        return
class EventAwareLumiByWorkTest(unittest.TestCase):
    """
    _EventAwareLumiByWorkTest_

    Test event based job splitting.
    """
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)
        self.testInit.setSchema(customModules=["WMCore.WMBS"])

        self.splitterFactory = SplitterFactory(package="WMCore.JobSplitting")

        self.myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=logging,
                                     dbinterface=self.myThread.dbi)

        myResourceControl = ResourceControl()
        myResourceControl.insertSite("T1_US_FNAL", 1000, 2000,
                                     "T1_US_FNAL_Disk", "T1_US_FNAL")
        myResourceControl.insertSite("T2_CH_CERN", 1000, 2000, "T2_CH_CERN",
                                     "T2_CH_CERN")

        self.performanceParams = {
            'timePerEvent': 12,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }
        # dummy workflow
        self.testWorkflow = Workflow(spec="spec.xml",
                                     owner="dmwm",
                                     name="testWorkflow",
                                     task="Test")
        self.testWorkflow.create()

        if PY3:
            self.assertItemsEqual = self.assertCountEqual

        return

    def tearDown(self):
        """
        _tearDown_
        """
        self.testInit.clearDatabase()

    def createSubscription(self,
                           nFiles,
                           lumisPerFile,
                           twoSites=False,
                           nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """
        baseName = makeUUID()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="dmwm",
                                name="testWorkflow_%s" % baseName[:4],
                                task="Test")
        testWorkflow.create()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'T1_US_FNAL_Disk')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i),
                                          nEventsPerFile, i, lumisPerFile,
                                          'T2_CH_CERN')
                testFileset.addFile(newFile)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        return testSubscription

    @staticmethod
    def createFile(lfn, events, run, lumis, location, lumiMultiplier=None):
        """
        _createFile_

        Create a file for testing
        """
        if lumiMultiplier is None:
            lumiMultiplier = run

        newFile = File(lfn=lfn, size=1000, events=events)
        lumiList = []
        for lumi in range(lumis):
            lumiList.append((lumiMultiplier * lumis) + lumi)
        newFile.addRun(Run(run, *lumiList))
        newFile.setLocation(location)
        return newFile

    def _createThisSubscription(self, initialCounter=1):
        """
        Private function to create a fileset and subscription with
        different fileset and file names

        :param initialCounter: just a simple integer to be appended to files
        :return: an splitter instance (jobFactory)
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        testFileset = Fileset(name='Fileset%s' % initialCounter)

        fileA = File(lfn="/this/is/file%s" % initialCounter,
                     size=1000,
                     events=800)
        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("T1_US_FNAL_Disk")

        initialCounter = int(initialCounter) + 1
        fileB = File(lfn="/this/is/file%s" % initialCounter,
                     size=1000,
                     events=400)
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("T1_US_FNAL_Disk")

        initialCounter = int(initialCounter) + 1
        fileC = File(lfn="/this/is/file%s" % initialCounter,
                     size=1000,
                     events=500)
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("T1_US_FNAL_Disk")

        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        return jobFactory

    def testFileSplitting(self):
        """
        _testFileSplitting_

        Test that things work if we split files between jobs
        """
        splitter = SplitterFactory()

        oneSetSubscription = self.createSubscription(nFiles=10, lumisPerFile=1)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=oneSetSubscription)

        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               events_per_job=100,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 10)
        for job in jobGroups[0].jobs:
            self.assertTrue(len(job['input_files']), 1)

        twoLumiFiles = self.createSubscription(nFiles=5, lumisPerFile=2)
        jobFactory = splitter(package="WMCore.WMBS", subscription=twoLumiFiles)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               events_per_job=50,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 10)
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job['input_files']), 1)

        wholeLumiFiles = self.createSubscription(nFiles=5, lumisPerFile=3)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=wholeLumiFiles)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               events_per_job=67,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        # 10 because we split on run boundaries
        self.assertEqual(len(jobGroups[0].jobs), 10)
        jobList = jobGroups[0].jobs
        for job in jobList:
            # Have should have one file, half two
            self.assertTrue(len(job['input_files']) in [1, 2])

        jobLumiList = [
            jobList[i]['mask'].getRunAndLumis() for i in range(0, 10)
        ]
        correctJobLumiList = [{
            0: [[0, 1]]
        }, {
            0: [[2, 2]]
        }, {
            1: [[3, 4]]
        }, {
            1: [[5, 5]]
        }, {
            4: [[12, 13]]
        }, {
            4: [[14, 14]]
        }]

        for lumiList in correctJobLumiList:
            self.assertIn(lumiList, jobLumiList)

        # Do it with multiple sites
        twoSiteSubscription = self.createSubscription(nFiles=5,
                                                      lumisPerFile=2,
                                                      twoSites=True)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=twoSiteSubscription)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               events_per_job=50,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 2)
        self.assertEqual(len(jobGroups[0].jobs), 10)
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job['input_files']), 1)

    def testNoRunNoFileSplitting(self):
        """
        _testNoRunNoFileSplitting_

        Test the splitting algorithm in the odder fringe
        cases that might be required.
        """
        splitter = SplitterFactory()
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=60,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 9)
        self.enforceLimits(filesPerJob=2, jobsPerFile=3)

        # Assert that this works differently with file splitting on and run splitting on
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=60,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 10)
        self.enforceLimits(filesPerJob=1, jobsPerFile=2, runsPerJob=1)

        # Test total_events limit. (The algorithm cuts off after the lumi that
        # brings the total average event count over -or equal to- total_events.)
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=60,
                               total_events=10,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1)
        self.assertTrue(
            jobs[0]
            ['mask'].getRunAndLumis())  # Make sure it has a lumi to process

        # Test the total event limit again
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=60,
                               total_events=179,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)

        # Test the total event limit on the boundary
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=60,
                               total_events=180,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)

        # Test the total event limit just past the boundary
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=60,
                               total_events=181,
                               sperformance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 4)

        return

    def testFileSplitNoHardLimit(self):
        """
        _testFileSplitNoHardLimit_

        Simplest use case, there is only a self limit of events per job which
        the algorithm must adapt to on a file by file basis. At most
        one file per job so we don't have to pass information between files.
        """
        splitter = SplitterFactory()

        # Create 5 files with 7 lumi per file and 100 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=7,
                                                   twoSites=False,
                                                   nEventsPerFile=700)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # First test, the optimal settings are 360 events per job
        # As we have files with 100 events per lumi, this will configure the splitting to
        # 3.6 lumis per job, which rounds to 3, the algorithm always approximates to the lower integer.
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 10)

        # Now set the average to 200 events per job
        # This results in the algorithm reducing the lumis per job to 2
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=7,
                                                   twoSites=False,
                                                   nEventsPerFile=700)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=200,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 20)

        # Check extremes, process a zero event files with lumis. It must be processed in one job
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=100,
                                                   twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               events_per_job=5000,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 0)
        # since there aren't enough events, we close this fileset to get it moving
        fileset = testSubscription.getFileset()
        fileset.markOpen(False)

        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               events_per_job=5000,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 5)

        # Process files with 10k events per lumi, fallback to one lumi per job. We can't do better
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=5,
                                                   twoSites=False,
                                                   nEventsPerFile=50000)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=5000,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 25)

        # Test total_events limit. (The algorithm cuts off after the lumi that
        # brings the total average event count over -or equal to- total_events.)
        testSubscription = self.createSubscription(nFiles=5,
                                                   lumisPerFile=3,
                                                   twoSites=False,
                                                   nEventsPerFile=300)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=250,
                               total_events=750,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 5)
        self.enforceLimits(filesPerJob=1, jobsPerFile=2, runsPerJob=1)

        return

    def testNoFileSplitNoHardLimit(self):
        """
        _testNoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        # Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100,
                                                   lumisPerFile=7,
                                                   twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # First test, the optimal settings are 360 events per job. As we have files with 0 events per lumi, this will
        # configure the splitting to a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 0)

        # since there are not enough events to be splitted, let's close the fileset and get it going
        fileset = testSubscription.getFileset()
        fileset.markOpen(False)

        # One job in one job group with 100 files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1)
        self.assertEqual(len(jobs[0]['input_files']), 100)

        # Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5,
                                    "T1_US_FNAL_Disk")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1,
                                    "T1_US_FNAL_Disk")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2,
                                    "T1_US_FNAL_Disk")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1,
                                    "T1_US_FNAL_Disk")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1,
                                    "T1_US_FNAL_Disk")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1,
                                    "T1_US_FNAL_Disk")
        testFileG = self.createFile("/this/is/file7", 153, 6, 3,
                                    "T1_US_FNAL_Disk")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # Split the work targeting 150 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=150,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7)

        # Test interactions of this algorithm with splitOnRun = True
        # Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file10", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("T2_CH_CERN")

        fileB = self.createFile('/this/is/file11', 200, 3, 5, "T2_CH_CERN")

        testFileset = Fileset(name='FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True,
                               halt_job_on_file_boundaries=False,
                               events_per_job=700,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6)
        # Make sure each job has one run
        for job in jobs:
            self.assertEqual(len(job['mask'].getRunAndLumis()), 1)

    def testHardLimitSplitting(self):
        """
        _testHardLimitSplitting_

        Test that we can specify a event limit, the
        algorithm shall take single lumi files with more events than the limit
        and mark them for failure
        """
        splitter = SplitterFactory()

        # Create 3 files, the one in the middle is a "bad" file
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 1000, 0, 5,
                                    "T1_US_FNAL_Disk")
        testFileB = self.createFile("/this/is/file2", 1000, 1, 1,
                                    "T1_US_FNAL_Disk")
        testFileC = self.createFile("/this/is/file3", 1000, 2, 2,
                                    "T1_US_FNAL_Disk")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # Settings are to split on job boundaries, to fail single lumis with more than 800 events
        # and to put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 5)

        # One job should be failed, the rest should be fine
        for jobNum in (0, 1, 3, 4):
            self.assertFalse(jobs[jobNum].get('failedOnCreation'))
        self.assertTrue(jobs[2]['failedOnCreation'])
        self.assertEqual(
            jobs[2]['failedReason'],
            'File /this/is/file2 has a single lumi 1, in run 1 with too many events 1000 and it woud take 12000 sec to run'
        )

        return

    def testHardLimitSplittingOnly(self):
        """
        _testHardLimitSplittingOnly_

        Checks that we can split a set of files where every file has a single
        lumi too big to fit in a runnable job
        """
        splitter = SplitterFactory()

        # Create 3 single-big-lumi files
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file0", 1000, 0, 1,
                                    "T1_US_FNAL_Disk")
        testFileB = self.createFile("/this/is/file1", 1000, 1, 1,
                                    "T1_US_FNAL_Disk")
        testFileC = self.createFile("/this/is/file2", 1000, 2, 1,
                                    "T1_US_FNAL_Disk")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # Fail single lumis with more than 800 events and put 550 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=True,
                               events_per_job=550,
                               job_time_limit=9600,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        for i in range(3):
            num = list(jobs[i]['mask']['runAndLumis'])[0]
            self.assertTrue(jobs[i]['failedOnCreation'])
            error = 'File /this/is/file%s has a single lumi %s, in run %s' % (
                num, num, num)
            error += ' with too many events 1000 and it woud take 12000 sec to run'
            self.assertEqual(jobs[i]['failedReason'], error)

        return

    def testLumiMask(self):
        """
        _testLumiMask_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("T1_US_FNAL_Disk")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("T1_US_FNAL_Disk")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("T1_US_FNAL_Disk")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=850,
                               runs=['1', '2', '4'],
                               lumis=['10,14', '20,21', '40,41'],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
        processedLumis = LumiList()
        for job in jobs:
            processedLumis += LumiList(
                compactList=job['mask'].getRunAndLumis())
        correctLumis = LumiList(compactList={
            1: [[10, 14]],
            2: [[20, 21]],
            4: [[40, 41]]
        })
        self.assertEqual(processedLumis.getCMSSWString(),
                         correctLumis.getCMSSWString())

    def testRunWhiteList(self):
        """
        _testRunWhiteList_

        Test that we can use a run white list to filter good runs/lumis.
        """
        jobFactory = self._createThisSubscription(1)

        # Split with no breaks
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=725,
                               runWhitelist=[1, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 4])

        # Re-split with a break on runs
        jobFactory = self._createThisSubscription(11)

        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=True,
                               events_per_job=595,
                               runWhitelist=[1, 3, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 4)
        self.enforceLimits(jobs=jobs, runsPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 3, 4])

        # Re-split with a break on files
        jobFactory = self._createThisSubscription(111)

        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=False,
                               events_per_job=595,
                               runWhitelist=[1, 2, 3],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        self.enforceLimits(jobs=jobs, filesPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis().keys():
                self.assertIn(run, [1, 2, 3])

    def testLumiMaskAndWhitelist(self):
        """
        _testLumiMaskAndWhitelist_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        jobFactory = self._createThisSubscription(1)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=850,
                               runs=['1', '2', '4'],
                               lumis=['10,14', '20,21', '40,41'],
                               runWhitelist=[1, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1)
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {
            1: [[10, 14]],
            4: [[40, 41]]
        })

    def testLumiCorrections(self):
        """
        _testLumiCorrections_

        Test the splitting algorithm can handle lumis which cross multiple files.
        No need for applyLumiCorrection=True
        """

        splitter = SplitterFactory()
        testSubscription = self.createSubscription(nFiles=2,
                                                   lumisPerFile=2,
                                                   twoSites=False,
                                                   nEventsPerFile=150)
        files = testSubscription.getFileset().getFiles()
        self.assertEqual(len(files), 2)

        # Two files with 2 lumis each: file0 has run0 and lumis 0,1 - file1 has run1 and lumis 2,3 - each 150 events
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        jobGroups = jobFactory(events_per_job=50,
                               halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               performance=self.performanceParams)

        # The splitting algorithm will assume 75 events per lumi so we will have one job per lumi
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 4)

        # Recreate the same subscription as before, but with duplicated lumis
        testFileset = Fileset(name='FilesetA')
        fileA = File(lfn="/this/is/file1", size=1000, events=150)
        lumiListA = [0, 1, 42]
        fileA.addRun(Run(0, *lumiListA))
        fileA.setLocation("T1_US_FNAL_Disk")
        testFileset.addFile(fileA)

        fileB = File(lfn="/this/is/file2", size=1000, events=150)
        lumiListB = [2, 3, 42]
        fileB.addRun(Run(0, *lumiListB))
        fileB.setLocation("T1_US_FNAL_Disk")
        testFileset.addFile(fileB)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(events_per_job=50,
                               halt_job_on_file_boundaries=True,
                               performance=self.performanceParams)

        # Now we will have: file0: Run0 and lumis [0, 1, 42] file1: Run0 and lumis [2, 3, 42]
        # With 50 events per lumi, one job per lumi, one will have two files on lumi 42
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 5)

        n1files = 0
        n2files = 0
        lumi1files = []
        lumi2files = []

        for job in jobs:
            runLumis = job['mask'].getRunAndLumis()
            lumis = runLumis[0]
            self.assertEqual(len(runLumis), 1)
            self.assertEqual(len(lumis), 1)
            self.assertEqual(lumis[0][0],
                             lumis[0][1])  # Make sure only one lumi per job
            if len(job['input_files']) == 1:
                n1files += 1
                lumi1files.append(lumis[0][0])
            elif len(job['input_files']) == 2:
                n2files += 1
                lumi2files.append(lumis[0][0])
            else:
                self.fail("At least one job has nFiles =! 1, 2")

        self.assertEqual(n1files, 4)
        self.assertEqual(n2files, 1)
        self.assertItemsEqual(lumi1files, [0, 1, 2, 3])
        self.assertItemsEqual(lumi2files, [42])

    def test_NotEnoughEvents(self):
        """
        _test_NotEnoughEvents_

        Checks whether jobs are not created when there are not enough files (actually, events)
        according to the events_per_job requested to the splitter algorithm
        """
        splitter = SplitterFactory()

        # Very small fileset (single file) without enough events
        testSubscription = self.createSubscription(nFiles=1,
                                                   lumisPerFile=2,
                                                   nEventsPerFile=200)

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(events_per_job=500,
                               performance=self.performanceParams,
                               splitOnRun=False)

        self.assertEqual(len(jobGroups), 0)

        # Still a small fileset (two files) without enough events
        testSubscription = self.createSubscription(nFiles=2,
                                                   lumisPerFile=2,
                                                   nEventsPerFile=200)

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(events_per_job=500,
                               performance=self.performanceParams,
                               splitOnRun=False)

        self.assertEqual(len(jobGroups), 0)

        # Finally an acceptable fileset size (three files) with enough events
        testSubscription = self.createSubscription(nFiles=3,
                                                   lumisPerFile=2,
                                                   nEventsPerFile=200)

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(events_per_job=500,
                               performance=self.performanceParams,
                               splitOnRun=False)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        self.assertEqual(len(jobs[0]['input_files']), 3)
        self.assertEqual(len(jobs[1]['input_files']), 1)
        # run:lumis job distribution change from time to time
        runs, lumis = [], []
        for jobNum in range(2):
            runs.extend(jobs[jobNum]['mask'].getRunAndLumis().keys())
            for listLumi in jobs[jobNum]['mask'].getRunAndLumis().values():
                for l in listLumi:
                    lumis.extend(range(l[0], l[1] + 1))
        self.assertItemsEqual(set(runs), [0, 1, 2])
        self.assertItemsEqual(lumis, [0, 1, 2, 3, 4, 5])

        # Test fileset with a single run and splitOnRun=True
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1",
                                    200,
                                    1,
                                    2,
                                    "T1_US_FNAL_Disk",
                                    lumiMultiplier=0)
        testFileB = self.createFile("/this/is/file2",
                                    200,
                                    1,
                                    2,
                                    "T1_US_FNAL_Disk",
                                    lumiMultiplier=1)
        testFileC = self.createFile("/this/is/file3",
                                    200,
                                    1,
                                    2,
                                    "T1_US_FNAL_Disk",
                                    lumiMultiplier=2)
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(events_per_job=500,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        self.assertEqual(len(jobs[0]['input_files']), 3)
        self.assertEqual(len(jobs[1]['input_files']), 1)
        self.assertItemsEqual(jobs[0]['mask'].getRunAndLumis(), {1: [[0, 4]]})
        self.assertItemsEqual(jobs[1]['mask'].getRunAndLumis(), {1: [[5, 5]]})

        return

    def enforceLimits(self,
                      jobs=None,
                      filesPerJob=None,
                      jobsPerFile=None,
                      runsPerJob=None):
        """
        Args:
            jobs: the list of jobs to check
            filesPerJob: The maximum number of files that can be in one job
            jobsPerFile: The maximum number of jobs that a particular file can appear in
            runsPerJob: The maximum number of runs that can be in a job

        Returns: nothing

        Check the various limits on job and file distribution, raise assertion errors if they are violated
        """

        jobs = jobs or []
        jobsPerFileMap = Counter()
        for job in jobs:
            if filesPerJob:
                self.assertLessEqual(len(job['input_files']), filesPerJob)
            if runsPerJob:
                self.assertLessEqual(len(job['mask'].getRunAndLumis()),
                                     runsPerJob)
            for f in job['input_files']:
                jobsPerFileMap[f['lfn']] += 1
        for lfn in jobsPerFileMap:
            if jobsPerFile:
                self.assertLessEqual(jobsPerFileMap[lfn], jobsPerFile)

        return
Example #44
0
    def _createSubscriptionsInWMBS(self,
                                   task,
                                   fileset,
                                   alternativeFilesetClose=False):
        """
        __createSubscriptionsInWMBS_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        #FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(
            spec=self.wmSpec.specUrl(),
            owner=self.wmSpec.getOwner()["name"],
            dn=self.wmSpec.getOwner().get("dn", "unknown"),
            group=self.wmSpec.getOwner().get("group", "unknown"),
            owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
            owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"),
            name=self.wmSpec.name(),
            task=task.getPathName(),
            wfType=self.wmSpec.getDashboardActivity(),
            alternativeFilesetClose=alternativeFilesetClose,
            priority=self.wmSpec.priority())
        workflow.create()
        subscription = Subscription(fileset=fileset,
                                    workflow=workflow,
                                    split_algo=task.jobSplittingAlgorithm(),
                                    type=task.getPrimarySubType())
        if subscription.exists():
            subscription.load()
            msg = "Subscription %s already exists for %s (you may ignore file insertion messages below, existing files wont be duplicated)"
            self.logger.info(msg % (subscription['id'], task.getPathName()))
        else:
            subscription.create()
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": True
            }])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": False
            }])

        if self.topLevelSubscription == None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription created: %s" %
                         subscription["id"])
        else:
            logging.info("Child subscription created: %s" % subscription["id"])

        outputModules = task.getOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                outputFileset = Fileset(
                    self.outputFilesetName(task, outputModuleName))
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        if childTask.taskType() == "Merge":
                            mergedOutputFileset = Fileset(
                                self.outputFilesetName(childTask, "Merged"))
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(
                                getattr(outputModule, outputModuleName),
                                "primaryDataset", None)
                            if primaryDataset != None:
                                self.mergeOutputMapping[
                                    mergedOutputFileset.id] = primaryDataset

                        self._createSubscriptionsInWMBS(
                            childTask, outputFileset, alternativeFilesetClose)

                if mergedOutputFileset == None:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       outputFileset)
                else:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       mergedOutputFileset)

        return self.topLevelSubscription
Example #45
0
    def stuffWMBS(self):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        locationAction.execute("site1", seName="somese.cern.ch")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        self.mergeMergedFileset = Fileset(name="mergeMergedFileset")
        self.mergeMergedFileset.create()
        self.bogusMergedFileset = Fileset(name="bogusMergedFileset")
        self.bogusMergedFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow",
                                 spec="bunk2",
                                 owner="Steve",
                                 task="Test")
        mergeWorkflow.create()

        self.mergeSubscription = Subscription(fileset=self.mergeFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="SplitFileBased")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset=self.bogusFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="SplitFileBased")

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputWorkflow = Workflow(name="inputWorkflow",
                                 spec="input",
                                 owner="Steve",
                                 task="Test")
        inputWorkflow.create()
        inputWorkflow.addOutput("someOutput", self.mergeFileset,
                                self.mergeMergedFileset)
        inputWorkflow.addOutput("someOutput2", self.bogusFileset,
                                self.bogusMergedFileset)

        inputSubscription = Subscription(fileset=inputFileset,
                                         workflow=inputWorkflow)
        inputSubscription.create()

        parentFile1 = File(lfn="parentFile1")
        parentFile1.create()
        parentFile2 = File(lfn="parentFile2")
        parentFile2.create()
        parentFile3 = File(lfn="parentFile3")
        parentFile3.create()
        parentFile4 = File(lfn="parentFile4")
        parentFile4.create()

        jobGroup1 = JobGroup(subscription=inputSubscription)
        jobGroup1.create()
        jobGroup2 = JobGroup(subscription=inputSubscription)
        jobGroup2.create()

        testJob1 = Job()
        testJob1.addFile(parentFile1)
        testJob1.create(jobGroup1)
        testJob1["state"] = "cleanout"
        testJob1["oldstate"] = "new"
        testJob1["couch_record"] = "somejive"
        testJob1["retry_count"] = 0
        testJob1["outcome"] = "success"
        testJob1.save()
        changeStateDAO.execute([testJob1])

        testJob2 = Job()
        testJob2.addFile(parentFile2)
        testJob2.create(jobGroup1)
        testJob2["state"] = "cleanout"
        testJob2["oldstate"] = "new"
        testJob2["couch_record"] = "somejive"
        testJob2["retry_count"] = 0
        testJob2["outcome"] = "success"
        testJob2.save()
        changeStateDAO.execute([testJob2])

        testJob3 = Job()
        testJob3.addFile(parentFile3)
        testJob3.create(jobGroup2)
        testJob3["state"] = "cleanout"
        testJob3["oldstate"] = "new"
        testJob3["couch_record"] = "somejive"
        testJob3["retry_count"] = 0
        testJob3["outcome"] = "success"
        testJob3.save()
        changeStateDAO.execute([testJob3])

        testJob4 = Job()
        testJob4.addFile(parentFile4)
        testJob4.create(jobGroup2)
        testJob4["state"] = "cleanout"
        testJob4["oldstate"] = "new"
        testJob4["couch_record"] = "somejive"
        testJob4["retry_count"] = 0
        testJob4["outcome"] = "failure"
        testJob4.save()
        changeStateDAO.execute([testJob4])

        # We'll simulate a failed split by event job that the merger should
        # ignore.
        parentFile5 = File(lfn="parentFile5")
        parentFile5.create()

        testJob5 = Job()
        testJob5.addFile(parentFile5)
        testJob5.create(jobGroup2)
        testJob5["state"] = "cleanout"
        testJob5["oldstate"] = "new"
        testJob5["couch_record"] = "somejive"
        testJob5["retry_count"] = 0
        testJob5["outcome"] = "success"
        testJob5.save()
        changeStateDAO.execute([testJob5])

        testJob6 = Job()
        testJob6.addFile(parentFile5)
        testJob6.create(jobGroup2)
        testJob6["state"] = "cleanout"
        testJob6["oldstate"] = "new"
        testJob6["couch_record"] = "somejive"
        testJob6["retry_count"] = 0
        testJob6["outcome"] = "failure"
        testJob6.save()
        changeStateDAO.execute([testJob6])

        badFile1 = File(lfn="badFile1",
                        size=10241024,
                        events=10241024,
                        first_event=0,
                        locations=set(["somese.cern.ch"]))
        badFile1.addRun(Run(1, *[45]))
        badFile1.create()
        badFile1.addParent(parentFile5["lfn"])

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file1.addParent(parentFile1["lfn"])
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["somese.cern.ch"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file2.addParent(parentFile1["lfn"])
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["somese.cern.ch"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file3.addParent(parentFile1["lfn"])
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations=set(["somese.cern.ch"]))
        file4.addRun(Run(1, *[45]))
        file4.create()
        file4.addParent(parentFile1["lfn"])

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileA.addParent(parentFile2["lfn"])
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["somese.cern.ch"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileB.addParent(parentFile2["lfn"])
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["somese.cern.ch"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        fileC.addParent(parentFile2["lfn"])

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileI.addParent(parentFile3["lfn"])
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations=set(["somese.cern.ch"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileII.addParent(parentFile3["lfn"])
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=1024,
                       first_event=2048,
                       locations=set(["somese.cern.ch"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIII.addParent(parentFile3["lfn"])
        fileIV = File(lfn="fileIV",
                      size=1024,
                      events=1024,
                      first_event=3072,
                      locations=set(["somese.cern.ch"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()
        fileIV.addParent(parentFile3["lfn"])

        fileX = File(lfn="badFileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["somese.cern.ch"]))
        fileX.addRun(Run(1, *[47]))
        fileX.create()
        fileX.addParent(parentFile4["lfn"])
        fileY = File(lfn="badFileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["somese.cern.ch"]))
        fileY.addRun(Run(1, *[47]))
        fileY.create()
        fileY.addParent(parentFile4["lfn"])
        fileZ = File(lfn="badFileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["somese.cern.ch"]))
        fileZ.addRun(Run(1, *[47]))
        fileZ.create()
        fileZ.addParent(parentFile4["lfn"])

        jobGroup1.output.addFile(file1)
        jobGroup1.output.addFile(file2)
        jobGroup1.output.addFile(file3)
        jobGroup1.output.addFile(file4)
        jobGroup1.output.addFile(fileA)
        jobGroup1.output.addFile(fileB)
        jobGroup1.output.addFile(fileC)
        jobGroup1.output.commit()

        jobGroup2.output.addFile(fileI)
        jobGroup2.output.addFile(fileII)
        jobGroup2.output.addFile(fileIII)
        jobGroup2.output.addFile(fileIV)
        jobGroup2.output.addFile(fileX)
        jobGroup2.output.addFile(fileY)
        jobGroup2.output.addFile(fileZ)
        jobGroup2.output.addFile(badFile1)
        jobGroup2.output.commit()

        for file in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV, fileX, fileY, fileZ, badFile1
        ]:
            self.mergeFileset.addFile(file)
            self.bogusFileset.addFile(file)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return
Example #46
0
    def createJobs(self):
        """
        _createJobs_

        Create test jobs in WMBS and BossAir
        """
        testWorkflow = Workflow(spec = makeUUID(), owner = "tapas",
                                name = makeUUID(), task = "Test")
        testWorkflow.create()

        testFilesetA = Fileset(name = "TestFilesetA")
        testFilesetA.create()
        testFilesetB = Fileset(name = "TestFilesetB")
        testFilesetB.create()
        testFilesetC = Fileset(name = "TestFilesetC")
        testFilesetC.create()

        testFileA = File(lfn = "testFileA", locations = set(["testSE1", "testSE2"]))
        testFileA.create()
        testFilesetA.addFile(testFileA)
        testFilesetA.commit()
        testFilesetB.addFile(testFileA)
        testFilesetB.commit()
        testFilesetC.addFile(testFileA)
        testFilesetC.commit()

        testSubscriptionA = Subscription(fileset = testFilesetA,
                                        workflow = testWorkflow,
                                        type = "Processing")
        testSubscriptionA.create()
        testSubscriptionA.addWhiteBlackList([{"site_name": "testSite1", "valid": True}])
        testSubscriptionB = Subscription(fileset = testFilesetB,
                                        workflow = testWorkflow,
                                        type = "Processing")
        testSubscriptionB.create()
        testSubscriptionB.addWhiteBlackList([{"site_name": "testSite1", "valid": False}])
        testSubscriptionC = Subscription(fileset = testFilesetC,
                                        workflow = testWorkflow,
                                        type = "Merge")
        testSubscriptionC.create()

        testJobGroupA = JobGroup(subscription = testSubscriptionA)
        testJobGroupA.create()
        testJobGroupB = JobGroup(subscription = testSubscriptionB)
        testJobGroupB.create()
        testJobGroupC = JobGroup(subscription = testSubscriptionC)
        testJobGroupC.create()

        # Site1, Has been assigned a location and is complete.
        testJobA = Job(name = "testJobA", files = [testFileA])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group = testJobGroupA)
        testJobA["state"] = "success"

        # Site 1, Has been assigned a location and is incomplete.
        testJobB = Job(name = "testJobB", files = [testFileA])
        testJobB["couch_record"] = makeUUID()
        testJobB["cache_dir"] = self.tempDir
        testJobB.create(group = testJobGroupA)
        testJobB["state"] = "executing"
        runJobB = RunJob()
        runJobB.buildFromJob(testJobB)
        runJobB["status"] = "PEND"

        # Does not have a location, white listed to site 1
        testJobC = Job(name = "testJobC", files = [testFileA])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group = testJobGroupA)
        testJobC["state"] = "new"

        # Site 2, Has been assigned a location and is complete.
        testJobD = Job(name = "testJobD", files = [testFileA])
        testJobD["couch_record"] = makeUUID()
        testJobD.create(group = testJobGroupB)
        testJobD["state"] = "success"

        # Site 2, Has been assigned a location and is incomplete.
        testJobE = Job(name = "testJobE", files = [testFileA])
        testJobE["couch_record"] = makeUUID()
        testJobE.create(group = testJobGroupB)
        testJobE["state"] = "executing"
        runJobE = RunJob()
        runJobE.buildFromJob(testJobE)
        runJobE["status"] = "RUN"

        # Does not have a location, site 1 is blacklisted.
        testJobF = Job(name = "testJobF", files = [testFileA])
        testJobF["couch_record"] = makeUUID()
        testJobF.create(group = testJobGroupB)
        testJobF["state"] = "new"

        # Site 3, Has been assigned a location and is complete.
        testJobG = Job(name = "testJobG", files = [testFileA])
        testJobG["couch_record"] = makeUUID()
        testJobG.create(group = testJobGroupC)
        testJobG["state"] = "cleanout"

        # Site 3, Has been assigned a location and is incomplete.
        testJobH = Job(name = "testJobH", files = [testFileA])
        testJobH["couch_record"] = makeUUID()
        testJobH.create(group = testJobGroupC)
        testJobH["state"] = "new"

        # Site 3, Does not have a location.
        testJobI = Job(name = "testJobI", files = [testFileA])
        testJobI["couch_record"] = makeUUID()
        testJobI.create(group = testJobGroupC)
        testJobI["state"] = "new"

        # Site 3, Does not have a location and is in cleanout.
        testJobJ = Job(name = "testJobJ", files = [testFileA])
        testJobJ["couch_record"] = makeUUID()
        testJobJ.create(group = testJobGroupC)
        testJobJ["state"] = "cleanout"

        changeStateAction = self.daoFactory(classname = "Jobs.ChangeState")
        changeStateAction.execute(jobs = [testJobA, testJobB, testJobC, testJobD,
                                          testJobE, testJobF, testJobG, testJobH,
                                          testJobI, testJobJ])

        self.insertRunJob.execute([runJobB, runJobE])

        setLocationAction = self.daoFactory(classname = "Jobs.SetLocation")
        setLocationAction.execute(testJobA["id"], "testSite1")
        setLocationAction.execute(testJobB["id"], "testSite1")
        setLocationAction.execute(testJobD["id"], "testSite1")
        setLocationAction.execute(testJobE["id"], "testSite2")
        setLocationAction.execute(testJobG["id"], "testSite1")
        setLocationAction.execute(testJobH["id"], "testSite1")

        return
Example #47
0
    def populateWMBS(self):
        """
        _populateWMBS_

        Create files and subscriptions in WMBS
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName='s1', pnn="T1_US_FNAL_Disk")
        locationAction.execute(siteName='s2', pnn="T2_CH_CERN")
        self.validLocations = ["T1_US_FNAL_Disk", "T2_CH_CERN"]

        testWorkflow = Workflow(spec="spec.xml", owner="Steve",
                                name="wf001", task="Test")
        testWorkflow.create()

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/', size=1000, events=100,
                          locations=set(["T1_US_FNAL_Disk"]))
        parentFile.create()
        for _ in range(10):
            newFile = File(makeUUID(), size=1000, events=100,
                           locations=set(["T1_US_FNAL_Disk"]))
            newFile.create()
            newFile.addParent(lfn=parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()
        self.multipleFileSubscription = Subscription(fileset=self.multipleFileFileset,
                                                     workflow=testWorkflow,
                                                     split_algo="EventBased",
                                                     type="Processing")
        self.multipleFileSubscription.create()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name", size=1000, events=100,
                       locations=set(["T1_US_FNAL_Disk"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()
        self.singleFileSubscription = Subscription(fileset=self.singleFileFileset,
                                                   workflow=testWorkflow,
                                                   split_algo="EventBased",
                                                   type="Processing")
        self.singleFileSubscription.create()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for _ in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation("T1_US_FNAL_Disk")
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for _ in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation(["T1_US_FNAL_Disk", "T2_CH_CERN"])
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()
        self.multipleSiteSubscription = Subscription(fileset=self.multipleSiteFileset,
                                                     workflow=testWorkflow,
                                                     split_algo="EventBased",
                                                     type="Processing")
        self.multipleSiteSubscription.create()

        return
Example #48
0
    def createTestJobGroup(self,
                           config,
                           name="TestWorkthrough",
                           filesetName="TestFileset",
                           specLocation="spec.xml",
                           error=False,
                           task="/TestWorkload/ReReco",
                           type="Processing"):
        """
        Creates a group of several jobs

        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=specLocation,
                                owner=self.OWNERDN,
                                name=name,
                                task=task,
                                owner_vogroup="",
                                owner_vorole="")
        testWorkflow.create()
        self.inject.execute(names=[name], injected=True)

        testWMBSFileset = Fileset(name=filesetName)
        testWMBSFileset.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(10, *[12314]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        outputWMBSFileset = Fileset(name='%sOutput' % filesetName)
        outputWMBSFileset.create()
        testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10)
        testFileC.addRun(Run(10, *[12312]))
        testFileC.setLocation('malpaquet')
        testFileC.create()
        outputWMBSFileset.addFile(testFileC)
        outputWMBSFileset.commit()
        outputWMBSFileset.markOpen(0)

        testWorkflow.addOutput('output', outputWMBSFileset)

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow,
                                        type=type)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, self.nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        changer = ChangeState(config)

        report1 = Report()
        report2 = Report()
        if error:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 "WMComponent_t/JobAccountant_t/fwjrs",
                                 "badBackfillJobReport.pkl")
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        else:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'mergeReport1.pkl')
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        report1.load(filename=path1)
        report2.load(filename=path2)

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        for i in range(self.nJobs):
            if i < self.nJobs / 2:
                testJobGroup.jobs[i]['fwjr'] = report1
            else:
                testJobGroup.jobs[i]['fwjr'] = report2
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'exhausted', 'retrydone')
        changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup
Example #49
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN2')
                                    """, transaction = False)

        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        for lumi in range(1, 5):
            insertLumiDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : lumi },
                                  transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "Express" },
                                transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        fileset1 = Fileset(name = "TestFileset1")
        self.fileset2 = Fileset(name = "TestFileset2")
        fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1  = Subscription(fileset = fileset1,
                                           workflow = workflow1,
                                           split_algo = "Express",
                                           type = "Express")
        self.subscription2  = Subscription(fileset = self.fileset2,
                                           workflow = workflow2,
                                           split_algo = "ExpressMerge",
                                           type = "ExpressMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction = False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['maxInputSize'] = 2 * 1024 * 1024 * 1024
        self.splitArgs['maxInputFiles'] = 500,
        self.splitArgs['maxLatency'] = 15 * 23

        return
Example #50
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T1_US_FNAL_Disk")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/',
                          size=1000,
                          events=100,
                          locations=set(["T1_US_FNAL_Disk"]))
        parentFile.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["T1_US_FNAL_Disk"]))
            newFile.addRun(Run(i, *[45]))
            newFile.create()
            newFile.addParent(lfn=parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["T1_US_FNAL_Disk"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["T1_US_FNAL_Disk"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["T2_CH_CERN", "T1_US_FNAL_Disk"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription.create()

        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.multipleSiteSubscription.create()

        self.performanceParams = {
            'timePerEvent': 12,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }
        return
Example #51
0
    def testPromptRecoWithSkims(self):
        """
        _testT1PromptRecoWithSkim_

        Create a T1 Prompt Reconstruction workflow with PromptSkims
        and verify it installs into WMBS correctly.
        """
        self.setupPromptSkimConfigObject()
        testArguments = getTestArguments()
        testArguments["PromptSkims"] = [self.promptSkim]
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        testArguments["CouchDBName"] = "promptreco_t"
        testArguments["EnvPath"] = os.environ.get("EnvPath", None)
        testArguments["BinPath"] = os.environ.get("BinPath", None)

        testWorkload = promptrecoWorkload("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()),
                         len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = [
            "write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"
        ]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(
                    mergedOutput.name,
                    "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                    goldenOutputMod,
                    "Error: Merged output fileset is wrong: %s" %
                    mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name="TestWorkload",
                                    task="/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(
            len(alcaSkimWorkflow.outputMap.keys()),
            len(testArguments["AlcaSkims"]) + 1,
            "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        promptSkimWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1")
        promptSkimWorkflow.load()

        self.assertEqual(len(promptSkimWorkflow.outputMap.keys()), 6,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged"
                % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s"
                % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = promptSkimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = promptSkimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Reco/RecoMerge%s" %
                                     goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod, "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(
                len(mergeWorkflow.outputMap.keys()), 2,
                "Error: Wrong number of WF outputs %d." %
                len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s"
                % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(
                len(mergeWorkflow.outputMap.keys()), 2,
                "Error: Wrong number of WF outputs %d." %
                len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged"
                % goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged"
                % goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            recoSubscription["split_algo"], "EventBased",
            "Error: Wrong split algorithm. %s" %
            recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(
            name="/TestWorkload/Reco/unmerged-write_ALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset=alcaRecoFileset,
                                            workflow=alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            alcaSkimSubscription["split_algo"], "WMBSMergeBySize",
            "Error: Wrong split algorithm. %s" %
            alcaSkimSubscription["split_algo"])

        mergedRecoFileset = Fileset(
            name="/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged")
        mergedRecoFileset.loadData()

        promptSkimSubscription = Subscription(fileset=mergedRecoFileset,
                                              workflow=promptSkimWorkflow)
        promptSkimSubscription.loadData()

        self.assertEqual(promptSkimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            promptSkimSubscription["split_algo"], "FileBased",
            "Error: Wrong split algorithm. %s" %
            promptSkimSubscription["split_algo"])

        unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"]
        for unmergedOutput in unmergedOutputs:
            unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                       unmergedOutput)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedDataTier,
                                             workflow=dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "WMBSMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" %
                unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedAlcaSkim,
                                             workflow=alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "WMBSMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])

        unmergedOutputs = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for unmergedOutput in unmergedOutputs:
            unmergedPromptSkim = Fileset(
                name=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s"
                % unmergedOutput)
            unmergedPromptSkim.loadData()
            promptSkimMergeWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s"
                % unmergedOutput)
            promptSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedPromptSkim,
                                             workflow=promptSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "ParentlessMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])

        goldenOutputMods = [
            "write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"
        ]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                      goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" %
                goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(
                name=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s"
                % unmergedOutput)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1CleanupUnmerged%s"
                % unmergedOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                cleanupSubscription["split_algo"], "SiblingProcessingBased",
                "Error: Wrong split algorithm. %s" %
                cleanupSubscription["split_algo"])

        recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=recoLogCollect,
                                     workflow=recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(
            name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=alcaSkimLogCollect,
                                     workflow=alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        promptSkimLogCollect = Fileset(
            name=
            "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive"
        )
        promptSkimLogCollect.loadData()
        promptSkimLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1LogCollect"
        )
        promptSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=promptSkimLogCollect,
                                     workflow=promptSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(
                name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" %
                (goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=recoMergeLogCollect,
                workflow=recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(
                name=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect"
                % (goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=alcaSkimLogCollect,
                workflow=alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        goldenOutputMods = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for goldenOutputMod in goldenOutputMods:
            promptSkimMergeLogCollect = Fileset(
                name=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive"
                % goldenOutputMod)
            promptSkimMergeLogCollect.loadData()
            promptSkimMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/TestSkim1%sMergeLogCollect"
                % (goldenOutputMod, goldenOutputMod))
            promptSkimMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=promptSkimMergeLogCollect,
                workflow=promptSkimMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        return
Example #52
0
    def testFailJobInput(self):
        """
        _testFailJobInput_

        Test the Jobs.FailInput DAO and verify that it doesn't affect other
        jobs/subscriptions that run over the same files.
        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        bogusWorkflow = Workflow(spec="spec1.xml",
                                 owner="Steve",
                                 name="wf002",
                                 task="Test")
        testWorkflow.create()
        bogusWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        bogusFileset = Fileset(name="BogusFileset")
        testFileset.create()
        bogusFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        bogusSubscription = Subscription(fileset=bogusFileset,
                                         workflow=bogusWorkflow)
        testSubscription.create()
        bogusSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileA.create()
        testFileB.create()
        testFileC.create()

        testFileset.addFile([testFileA, testFileB, testFileC])
        bogusFileset.addFile([testFileA, testFileB, testFileC])
        testFileset.commit()
        bogusFileset.commit()

        testSubscription.completeFiles([testFileA, testFileB, testFileC])
        bogusSubscription.acquireFiles([testFileA, testFileB, testFileC])

        testJobGroup = JobGroup(subscription=testSubscription)
        bogusJobGroup = JobGroup(subscription=bogusSubscription)
        testJobGroup.create()
        bogusJobGroup.create()

        testJobA = Job(name="TestJobA",
                       files=[testFileA, testFileB, testFileC])
        testJobB = Job(name="TestJobB",
                       files=[testFileA, testFileB, testFileC])

        bogusJob = Job(name="BogusJob",
                       files=[testFileA, testFileB, testFileC])

        testJobA.create(group=testJobGroup)
        testJobB.create(group=testJobGroup)

        bogusJob.create(group=bogusJobGroup)

        testJobA.failInputFiles()
        testJobB.failInputFiles()

        self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3)
        self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0)

        changeStateAction = self.daoFactory(classname="Jobs.ChangeState")
        testJobB["state"] = "cleanout"
        changeStateAction.execute([testJobB])

        # Try again

        testJobA.failInputFiles()

        # Should now be failed
        self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3)
        self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0)

        # bogus should be unchanged
        self.assertEqual(len(bogusSubscription.filesOfStatus("Available")), 0)
        self.assertEqual(len(bogusSubscription.filesOfStatus("Acquired")), 3)
        self.assertEqual(len(bogusSubscription.filesOfStatus("Failed")), 0)
        self.assertEqual(len(bogusSubscription.filesOfStatus("Completed")), 0)

        return
Example #53
0
    def createGiantJobSet(self,
                          name,
                          config,
                          nSubs=10,
                          nJobs=10,
                          nFiles=1,
                          spec="spec.xml"):
        """
        Creates a massive set of jobs

        """

        jobList = []

        for i in range(0, nSubs):
            # Make a bunch of subscriptions
            localName = '%s-%i' % (name, i)
            testWorkflow = Workflow(spec=spec,
                                    owner=self.OWNERDN,
                                    name=localName,
                                    task="Test",
                                    owner_vogroup="",
                                    owner_vorole="")
            testWorkflow.create()

            testWMBSFileset = Fileset(name=localName)
            testWMBSFileset.create()

            testSubscription = Subscription(fileset=testWMBSFileset,
                                            workflow=testWorkflow)
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            filesToComplete = []

            for j in range(0, nJobs):
                # Create jobs for each subscription
                testFileA = File(lfn="%s-%i-lfnA" % (localName, j),
                                 size=1024,
                                 events=10)
                testFileA.addRun(
                    Run(
                        10, *[
                            11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
                            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
                            37, 38, 39, 40
                        ]))
                testFileA.setLocation('malpaquet')
                testFileA.create()

                testWMBSFileset.addFile(testFileA)
                testWMBSFileset.commit()

                filesToComplete.append(testFileA)

                testJob = Job(name='%s-%i' % (localName, j))
                testJob.addFile(testFileA)
                testJob['retry_count'] = 1
                testJob['retry_max'] = 10
                testJobGroup.add(testJob)
                jobList.append(testJob)

                for k in range(0, nFiles):
                    # Create output files
                    testFile = File(lfn="%s-%i-output" % (localName, k),
                                    size=1024,
                                    events=10)
                    testFile.addRun(Run(10, *[12312]))
                    testFile.setLocation('malpaquet')
                    testFile.create()

                    testJobGroup.output.addFile(testFile)

                testJobGroup.output.commit()

            testJobGroup.commit()

            changer = ChangeState(config)

            changer.propagate(testJobGroup.jobs, 'created', 'new')
            changer.propagate(testJobGroup.jobs, 'executing', 'created')
            changer.propagate(testJobGroup.jobs, 'complete', 'executing')
            changer.propagate(testJobGroup.jobs, 'success', 'complete')
            changer.propagate(testJobGroup.jobs, 'cleanout', 'success')

            testWMBSFileset.markOpen(0)

            testSubscription.completeFiles(filesToComplete)

        return jobList
Example #54
0
    def testPromptReco(self):
        """
        _testPromptReco_

        Create a Prompt Reconstruction workflow
        and verify it installs into WMBS correctly.
        """
        testArguments = getTestArguments()

        testWorkload = promptrecoWorkload("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()),
                         len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = [
            "write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"
        ]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(
                    mergedOutput.name,
                    "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                    goldenOutputMod,
                    "Error: Merged output fileset is wrong: %s" %
                    mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name="TestWorkload",
                                    task="/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(
            len(alcaSkimWorkflow.outputMap.keys()),
            len(testArguments["AlcaSkims"]) + 1,
            "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        dqmWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged"
        )
        dqmWorkflow.load()

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Reco/RecoMerge%s" %
                                     goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod, "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(
                len(mergeWorkflow.outputMap.keys()), 2,
                "Error: Wrong number of WF outputs %d." %
                len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            recoSubscription["split_algo"], "EventBased",
            "Error: Wrong split algorithm. %s" %
            recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(
            name="/TestWorkload/Reco/unmerged-write_ALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset=alcaRecoFileset,
                                            workflow=alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            alcaSkimSubscription["split_algo"], "WMBSMergeBySize",
            "Error: Wrong split algorithm. %s" %
            alcaSkimSubscription["split_algo"])

        mergedDQMFileset = Fileset(
            name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged")
        mergedDQMFileset.loadData()

        dqmSubscription = Subscription(fileset=mergedDQMFileset,
                                       workflow=dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"]
        for unmergedOutput in unmergedOutputs:
            unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                       unmergedOutput)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedDataTier,
                                             workflow=dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "WMBSMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" %
                unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedAlcaSkim,
                                             workflow=alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "WMBSMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])

        goldenOutputMods = [
            "write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"
        ]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                      goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" %
                goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=recoLogCollect,
                                     workflow=recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(
            name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=alcaSkimLogCollect,
                                     workflow=alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(
                name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" %
                (goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=recoMergeLogCollect,
                workflow=recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(
                name=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect"
                % (goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=alcaSkimLogCollect,
                workflow=alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        dqmHarvestLogCollect = Fileset(
            name=
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive"
        )
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/RecoMergewrite_DQMMergedDQMHarvestLogCollect"
        )
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset=dqmHarvestLogCollect,
                                     workflow=dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #55
0
    def createTestJobGroup(self,
                           name="TestWorkthrough",
                           specLocation="spec.xml",
                           error=False,
                           task="/TestWorkload/ReReco",
                           nJobs=10):
        """
        _createTestJobGroup_

        Generate a test WMBS JobGroup with real FWJRs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=specLocation,
                                owner="Simon",
                                name=name,
                                task=task)
        testWorkflow.create()

        testWMBSFileset = Fileset(name=name)
        testWMBSFileset.create()

        testFileA = File(lfn=makeUUID(), size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn=makeUUID(), size=1024, events=10)
        testFileB.addRun(Run(10, *[12312]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        report = Report()
        if error:
            path = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t/fwjrs",
                                "badBackfillJobReport.pkl")
        else:
            path = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t/fwjrs",
                                "PerformanceReport2.pkl")
        report.load(filename=path)

        self.changeState.propagate(testJobGroup.jobs, 'created', 'new')
        self.changeState.propagate(testJobGroup.jobs, 'executing', 'created')
        self.changeState.propagate(testJobGroup.jobs, 'complete', 'executing')
        for job in testJobGroup.jobs:
            job['fwjr'] = report
        self.changeState.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        self.changeState.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed')
        self.changeState.propagate(testJobGroup.jobs, 'exhausted', 'retrydone')
        self.changeState.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup
Example #56
0
    def test_AutoIncrementCheck(self):
        """
        _AutoIncrementCheck_

        Test and see whether we can find and set the auto_increment values
        """
        myThread = threading.currentThread()
        if not myThread.dialect.lower() == 'mysql':
            return

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck")
        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 1)

        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 2)

        incrementDAO.execute(input=10)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 11)

        incrementDAO.execute(input=5)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 12)

        return
Example #57
0
    def testMask(self):
        """
        _testMask_

        Test the new mask setup
        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102])
        testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202])
        testJob.create(group=testJobGroup)

        loadJob = Job(id=testJob.exists())
        loadJob.loadData()

        runs = loadJob['mask'].getRunAndLumis()
        self.assertEqual(len(runs), 2)
        self.assertEqual(runs[100], [[101, 102]])
        self.assertEqual(runs[200], [[201, 202]])

        bigRun = Run(100, *[101, 102, 103, 104])
        badRun = Run(300, *[1001, 1002])
        result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun])

        self.assertEqual(len(result), 1)
        alteredRun = result.pop()
        self.assertEqual(alteredRun.run, 100)
        self.assertEqual(alteredRun.lumis, [101, 102])

        run0 = Run(300, *[1001, 1002])
        run1 = Run(300, *[1001, 1002])
        loadJob['mask'].filterRunLumisByMask([run0, run1])

        return
Example #58
0
    def verifyKeepAOD(self):
        """
        _verifyKeepAOD_

        Verify that a workflow that only produces AOD in a single step was
        installed correctly into WMBS.
        """
        topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock")
        topLevelFileset.loadData()

        stepTwoUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-aodOutputModule")
        stepTwoUnmergedAODFileset.loadData()
        stepTwoMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-Merged")
        stepTwoMergedAODFileset.loadData()
        stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive")
        stepTwoLogArchiveFileset.loadData()
        stepTwoMergeAODLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-logArchive")
        stepTwoMergeAODLogArchiveFileset.loadData()

        stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc")
        stepTwoWorkflow.load()
        self.assertTrue("aodOutputModule" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id,
                         "Error: AOD output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepTwoUnmergedAODFileset.id,
                         "Error: AOD output fileset is wrong.")
        stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset)
        stepTwoSub.loadData()
        self.assertEqual(stepTwoSub["type"], "Processing",
                         "Error: Step two sub has wrong type.")

        for outputMod in stepTwoWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoCleanupAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedaodOutputModule")
        stepTwoCleanupAODWorkflow.load()
        self.assertEqual(len(stepTwoCleanupAODWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupAODSub = Subscription(workflow = stepTwoCleanupAODWorkflow, fileset = stepTwoUnmergedAODFileset)
        stepTwoCleanupAODSub.loadData()
        self.assertEqual(stepTwoCleanupAODSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/LogCollect")
        stepTwoLogCollectWorkflow.load()
        self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect shouldn't have any output.")
        stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset)
        stepTwoLogCollectSub.loadData()
        self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect",
                         "Error: Step two sub has wrong type.")

        stepTwoMergeAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                           task = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule")
        stepTwoMergeAODWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeAODWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeAODWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        stepTwoMergeAODSub = Subscription(workflow = stepTwoMergeAODWorkflow, fileset = stepTwoUnmergedAODFileset)
        stepTwoMergeAODSub.loadData()
        self.assertEqual(stepTwoMergeAODSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeAODWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeAODWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")
        return
Example #59
0
    def stuffWMBS(self, workflowURL, name):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s1", pnn="somese.cern.ch")

        mergeFileset = Fileset(name="mergeFileset")
        mergeFileset.create()
        bogusFileset = Fileset(name="bogusFileset")
        bogusFileset.create()

        mergeWorkflow = Workflow(spec=workflowURL,
                                 owner="mnorman",
                                 name=name,
                                 task="/TestWorkload/ReReco")
        mergeWorkflow.create()

        mergeSubscription = Subscription(fileset=mergeFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")
        mergeSubscription.create()
        dummySubscription = Subscription(fileset=bogusFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations={"somese.cern.ch"})
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations={"somese.cern.ch"})
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations={"somese.cern.ch"})
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations={"somese.cern.ch"})
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations={"somese.cern.ch"})
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations={"somese.cern.ch"})
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations={"somese.cern.ch"})
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations={"somese.cern.ch"})
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations={"somese.cern.ch"})
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=1024,
                       first_event=2048,
                       locations={"somese.cern.ch"})
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn="fileIV",
                      size=1024 * 1000000,
                      events=1024,
                      first_event=3072,
                      locations={"somese.cern.ch"})
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for fileObj in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV
        ]:
            mergeFileset.addFile(fileObj)
            bogusFileset.addFile(fileObj)

        mergeFileset.commit()
        bogusFileset.commit()

        return
Example #60
0
    def testCompleteJobInput(self):
        """
        _testCompleteJobInput_

        Verify the correct output of the CompleteInput DAO.  This should mark
        the input for a job as complete once all the jobs that run over a
        particular file have complete successfully.
        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        bogusWorkflow = Workflow(spec="spec1.xml",
                                 owner="Steve",
                                 name="wf002",
                                 task="Test")
        testWorkflow.create()
        bogusWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        bogusFileset = Fileset(name="BogusFileset")
        testFileset.create()
        bogusFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        bogusSubscription = Subscription(fileset=bogusFileset,
                                         workflow=bogusWorkflow)
        testSubscription.create()
        bogusSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileA.create()
        testFileB.create()
        testFileC.create()

        testFileset.addFile([testFileA, testFileB, testFileC])
        bogusFileset.addFile([testFileA, testFileB, testFileC])
        testFileset.commit()
        bogusFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB, testFileC])
        bogusSubscription.acquireFiles([testFileA, testFileB, testFileC])

        testJobGroup = JobGroup(subscription=testSubscription)
        bogusJobGroup = JobGroup(subscription=bogusSubscription)
        testJobGroup.create()
        bogusJobGroup.create()

        testJobA = Job(name="TestJobA", files=[testFileA])
        testJobB = Job(name="TestJobB", files=[testFileA, testFileB])
        testJobC = Job(name="TestJobC", files=[testFileC])
        bogusJob = Job(name="BogusJob",
                       files=[testFileA, testFileB, testFileC])
        testJobA.create(group=testJobGroup)
        testJobB.create(group=testJobGroup)
        testJobC.create(group=testJobGroup)
        bogusJob.create(group=bogusJobGroup)

        testJobA["outcome"] = "success"
        testJobB["outcome"] = "failure"
        testJobC["outcome"] = "success"
        testJobA.save()
        testJobB.save()
        testJobC.save()

        testJobA.completeInputFiles()

        compFiles = len(testSubscription.filesOfStatus("Completed"))
        assert compFiles == 0, \
               "Error: test sub has wrong number of complete files: %s" % compFiles

        testJobB["outcome"] = "success"
        testJobB.save()

        testJobB.completeInputFiles(skipFiles=[testFileB["lfn"]])

        availFiles = len(testSubscription.filesOfStatus("Available"))
        assert availFiles == 0, \
               "Error: test sub has wrong number of available files: %s" % availFiles

        acqFiles = len(testSubscription.filesOfStatus("Acquired"))
        assert acqFiles == 1, \
               "Error: test sub has wrong number of acquired files: %s" % acqFiles

        compFiles = len(testSubscription.filesOfStatus("Completed"))
        assert compFiles == 1, \
               "Error: test sub has wrong number of complete files: %s" % compFiles

        failFiles = len(testSubscription.filesOfStatus("Failed"))
        assert failFiles == 1, \
               "Error: test sub has wrong number of failed files: %s" % failFiles

        availFiles = len(bogusSubscription.filesOfStatus("Available"))
        assert availFiles == 0, \
               "Error: test sub has wrong number of available files: %s" % availFiles

        acqFiles = len(bogusSubscription.filesOfStatus("Acquired"))
        assert acqFiles == 3, \
               "Error: test sub has wrong number of acquired files: %s" % acqFiles

        compFiles = len(bogusSubscription.filesOfStatus("Completed"))
        assert compFiles == 0, \
               "Error: test sub has wrong number of complete files: %s" % compFiles

        failFiles = len(bogusSubscription.filesOfStatus("Failed"))
        assert failFiles == 0, \
               "Error: test sub has wrong number of failed files: %s" % failFiles

        return