Ejemplo n.º 1
0
    def testCreateDeleteExistsNoFiles(self):
        """
        _testCreateDeleteExistsNoFiles_

        Create and then delete a job but don't add any input files to it.
        Use the job class's exists() method to determine if the job has been
        written to the database before it is created, after it has been created
        and after it has been deleted.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job(name="TestJob")

        assert testJob.exists() == False, "ERROR: Job exists before it was created"

        testJob.create(group=testJobGroup)

        assert testJob.exists() >= 0, "ERROR: Job does not exist after it was created"

        testJob.delete()

        assert testJob.exists() == False, "ERROR: Job exists after it was delete"

        return
Ejemplo n.º 2
0
    def test_AutoIncrementCheck(self):
        """
        _AutoIncrementCheck_

        Test and see whether we can find and set the auto_increment values
        """
        myThread = threading.currentThread()
        if not myThread.dialect.lower() == "mysql":
            return

        testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck")
        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 1)

        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 2)

        incrementDAO.execute(input=10)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 11)

        incrementDAO.execute(input=5)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 12)

        return
Ejemplo n.º 3
0
    def createLargeFileBlock(self):
        """
        _createLargeFileBlock_
        
        Creates a large group of files for testing
        """
        testFileset = Fileset(name = "TestFilesetX")
        testFileset.create()
        for i in range(5000):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["somese.cern.ch"]))
            newFile.create()
            testFileset.addFile(newFile)
        testFileset.commit()
            
        testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman",
                                name = "wf003", task="Test" )
        testWorkflow.create()

        largeSubscription = Subscription(fileset = testFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "FileBased",
                                                   type = "Processing")
        largeSubscription.create()

        return largeSubscription
Ejemplo n.º 4
0
    def testStoreResults(self):
        """
        _testStoreResults_

        Create a StoreResults workflow and verify it installs into WMBS
        correctly.
        """
        arguments = StoreResultsWorkloadFactory.getTestArguments()
        arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"})

        factory = StoreResultsWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload",
                                                           arguments)

        testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        testWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/StoreResults")
        testWorkflow.load()

        self.assertEqual(len(testWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["Merged"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name)

        logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        return
    def testFilesWithoutOtherSubscriptions(self):
        """
        _testFilesWithoutOtherSubscriptions_

        Test the case where files only in the delete subscription
        can happen if cleanup of the other subscriptions is fast

        """
        testWorkflowA = Workflow(spec="specA.xml", owner="Steve", name="wfA", task="Test")
        testWorkflowA.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i), size=1000, events=100, locations=set(["somese.cern.ch"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(
            fileset=testFileset, workflow=testWorkflowA, split_algo="SiblingProcessingBased", type="Processing"
        )
        testSubscriptionA.create()

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package="WMCore.WMBS", subscription=testSubscriptionA)

        result = deleteFactoryA(files_per_job=50)
        self.assertEqual(len(result), 1, "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10, "Error: Wrong number of jobs returned.")

        return
Ejemplo n.º 6
0
    def testByRunHarvesting(self):
        """
        _testByRunHarvesting_
        Provided a fileset with a couple of files and 4 different runs, create
        one single job per run and location.
        The multiRun baggage should be false in this case.
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True, "Fileset should be open!")

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="amaltaro",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory()
        self.assertEqual(len(jobGroups), 1, "Should have created 1 job group")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 6, "Should have created 6 jobs")
            for job in jobGroup.jobs:
                baggage = job.getBaggage()
                self.assertFalse(getattr(baggage, "multiRun", False), "It's supposed to be a byRun job")
Ejemplo n.º 7
0
    def testMask(self):
        """
        _testMask_

        Test the new mask setup
        """

        testWorkflow = Workflow(spec="spec.xml", owner="Steve",
                                name="wf001", task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102])
        testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202])
        testJob.create(group=testJobGroup)

        loadJob = Job(id=testJob.exists())
        loadJob.loadData()

        runs = loadJob['mask'].getRunAndLumis()
        self.assertEqual(len(runs), 2)
        self.assertEqual(runs[100], [[101, 102]])
        self.assertEqual(runs[200], [[201, 202]])

        bigRun = Run(100, *[101, 102, 103, 104])
        badRun = Run(300, *[1001, 1002])
        result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun])

        self.assertEqual(len(result), 1)
        alteredRun = result.pop()
        self.assertEqual(alteredRun.run, 100)
        self.assertEqual(alteredRun.lumis, [101, 102])

        run0 = Run(300, *[1001, 1002])
        run1 = Run(300, *[1001, 1002])
        loadJob['mask'].filterRunLumisByMask([run0, run1])

        return
Ejemplo n.º 8
0
    def testListRunningJobs(self):
        """
        _testListRunningJobs_

        Test the ListRunningJobs DAO.
        """
        testWorkflow = Workflow(spec = makeUUID(), owner = "Steve",
                                name = makeUUID(), task="Test")
        testWorkflow.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = testWorkflow,
                                        type = "Processing")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testJobA = Job(name = makeUUID(), files = [])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group = testJobGroup)
        testJobA["state"] = "executing"

        testJobB = Job(name = makeUUID(), files = [])
        testJobB["couch_record"] = makeUUID()
        testJobB.create(group = testJobGroup)
        testJobB["state"] = "complete"

        testJobC = Job(name = makeUUID(), files = [])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group = testJobGroup)        
        testJobC["state"] = "new"

        changeStateAction = self.daoFactory(classname = "Jobs.ChangeState")
        changeStateAction.execute(jobs = [testJobA, testJobB, testJobC])

        runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs")
        runningJobs = runningJobsAction.execute()

        assert len(runningJobs) == 2, \
               "Error: Wrong number of running jobs returned."

        for runningJob in runningJobs:
            if runningJob["job_name"] == testJobA["name"]:
                assert runningJob["state"] == testJobA["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobA["couch_record"], \
                       "Error: Running job has wrong couch record."
            else:
                assert runningJob["job_name"] == testJobC["name"], \
                       "Error: Running job has wrong name."
                assert runningJob["state"] == testJobC["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobC["couch_record"], \
                       "Error: Running job has wrong couch record."                

        return
Ejemplo n.º 9
0
    def testCreateDeleteExists(self):
        """
        _testCreateDeleteExists_

        Create a JobGroup and then delete it.  Use the JobGroup's exists()
        method to determine if it exists before it is created, after it is
        created and after it is deleted.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task="Test")
        testWorkflow.create()

        testFileset = WMBSFileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)

        self.assertFalse(testJobGroup.exists())

        testJobGroup.create()

        self.assertTrue(testJobGroup.exists())

        testJobGroup.delete()

        self.assertFalse(testJobGroup.exists())

        testSubscription.delete()
        testFileset.delete()
        testWorkflow.delete()
        return
Ejemplo n.º 10
0
    def testCreateTransaction(self):
        """
        _testCreateTransaction_

        Create a JobGroup and commit it to the database.  Rollback the database
        transaction and verify that the JobGroup is no longer in the database.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        testWorkflow.create()

        testFileset = WMBSFileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)

        assert testJobGroup.exists() == False, "ERROR: Job group exists before it was created"

        myThread = threading.currentThread()
        myThread.transaction.begin()

        testJobGroup.create()

        assert testJobGroup.exists() >= 0, "ERROR: Job group does not exist after it was created"

        myThread.transaction.rollback()

        assert testJobGroup.exists() == False, "ERROR: Job group exists after transaction was rolled back."

        testSubscription.delete()
        testFileset.delete()
        testWorkflow.delete()
        return
Ejemplo n.º 11
0
    def createSingleJobWorkflow(self):
        """
        Create a workflow with one jobs and two files and store the results in instance variables
        """

        self.testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        self.testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=self.testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        self.testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        self.testFileA.addRun(Run(1, *[45]))
        self.testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        self.testFileB.addRun(Run(1, *[46]))
        self.testFileA.create()
        self.testFileB.create()

        self.testJob = Job(name="TestJob", files=[self.testFileA, self.testFileB])

        self.testJob.create(group=testJobGroup)
        self.testJob.associateFiles()
Ejemplo n.º 12
0
    def generateFakeMCFile(self, numEvents = 100, firstEvent = 1,
                           lastEvent = 100, firstLumi = 1, lastLumi = 10,
                           index = 1):
        #MC comes with only one MCFakeFile
        singleMCFileset = Fileset(name = "MCTestFileset %i" % index)
        singleMCFileset.create()
        newFile = File("MCFakeFileTest %i" % index, size = 1000,
                       events = numEvents,
                       locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1)))
        newFile["first_event"] = firstEvent
        newFile["last_event"] = lastEvent
        newFile.create()
        singleMCFileset.addFile(newFile)
        singleMCFileset.commit()
        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test")
        testWorkflow.create()

        singleMCFileSubscription = Subscription(fileset = singleMCFileset,
                                                workflow = testWorkflow,
                                                split_algo = "EventBased",
                                                type = "Production")
        singleMCFileSubscription.create()
        return singleMCFileSubscription
Ejemplo n.º 13
0
    def createTestSubscription(self, nFiles, nSites=1, closeFileset=False):
        """
        _createTestSubscription_
        
        Create a set of test subscriptions for testing purposes.
        """

        if nSites > self.nSites:
            nSites = self.nSites

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        # Create a testWorkflow
        testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test")
        testWorkflow.create()

        # Create the files for each site
        for s in range(nSites):
            for i in range(nFiles):
                newFile = File(makeUUID(), size=1024, events=100, locations=set(["site%i.cern.ch" % s]))
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(
            fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing"
        )
        testSubscription.create()

        # Close the fileset
        if closeFileset:
            testFileset.markOpen(isOpen=False)

        return testSubscription
Ejemplo n.º 14
0
    def subscribeWMBS(self, task):
        """
        Create a subscription for each task

        """

        workFlow, fileSet = self.createWorkflow(task)

        workFlow.load()
        fileSet.load()

        subType = ''
        if task.name() == 'Processing' or task.name() == 'Production':
            subType = 'Processing'
        elif task.name() == 'Merge':
            subType = 'Merge'

        newSub = Subscription(fileset=fileSet, workflow=workFlow, split_algo='FileBased', type=subType)
        newSub.create()

        # Add subscription to dictionary
        self.subDict[task.name()] = newSub

        # Add subscription id to task
        setattr(task.data.input.WMBS, 'Subscription', newSub['id'])

        if not newSub.exists() >= 0:
            raise Exception("ERROR: Subscription does not exist after it was created")

        logging.info('Created subscription for task %s' % (task.name()))

        return
Ejemplo n.º 15
0
    def subscribeWMBS(self, task):
        """
        Create a subscription for each task

        """

        workFlow, fileSet = self.createWorkflow(task)

        workFlow.load()
        fileSet.load()

        subType = ""
        if task.name() == "Processing" or task.name() == "Production":
            subType = "Processing"
        elif task.name() == "Merge":
            subType = "Merge"

        newSub = Subscription(fileset=fileSet, workflow=workFlow, split_algo="FileBased", type=subType)
        newSub.create()

        # Add subscription to dictionary
        self.subDict[task.name()] = newSub

        # Add subscription id to task
        setattr(task.data.input.WMBS, "Subscription", newSub["id"])

        if not newSub.exists() >= 0:
            raise Exception("ERROR: Subscription does not exist after it was created")

        logging.info("Created subscription for task %s" % (task.name()))

        return
Ejemplo n.º 16
0
    def testByRunAndRunBlacklist(self):
        """
        _testByRunAndRunWhitelist_

        Create harvesting jobs by run for the runs provided in the RunWhitelist
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml", owner="amaltaro",
                                      name="TestWorkflow", task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset, workflow=harvestingWorkflow,
                                  split_algo="Harvest", type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(runWhitelist=[1, 2, 3, 4, 5], runBlacklist=[1, 3])
        self.assertEqual(len(jobGroups), 1, "One jobgroup per location")

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 3)
Ejemplo n.º 17
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        for i in range(nFiles):
            newFile = self.createFile("%s_%i" % (baseName, i), nEventsPerFile, i, lumisPerFile, "somese.cern.ch")
            newFile.create()
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile("%s_%i_2" % (baseName, i), nEventsPerFile, i, lumisPerFile, "otherse.cern.ch")
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(
            fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing"
        )
        testSubscription.create()

        return testSubscription
Ejemplo n.º 18
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """
        baseName = makeUUID()

        testWorkflow = Workflow(spec="spec.xml", owner="dmwm",
                                name="testWorkflow_%s" % baseName[:4], task="Test")
        testWorkflow.create()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'T1_US_FNAL_Disk')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
                                          i, lumisPerFile, 'T2_CH_CERN')
                testFileset.addFile(newFile)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        return testSubscription
Ejemplo n.º 19
0
    def testMultiRunHarvesting(self):
        """
        _testMultiRunHarvesting_

        Provided a fileset with a couple of files and different runs, create a
        single job for all the runs at a specific location, which also adds a
        baggage to the job (True) which is later on looked up by SetupCMSSWPSet.
        """
        multipleFilesFileset = createCommonFileset()
        self.assertEqual(multipleFilesFileset.open, True)

        harvestingWorkflow = Workflow(spec="spec.xml",
                                      owner="amaltaro",
                                      name="TestWorkflow",
                                      task="Test")
        harvestingWorkflow.create()

        harvestSub = Subscription(fileset=multipleFilesFileset,
                                  workflow=harvestingWorkflow,
                                  split_algo="Harvest",
                                  type="Harvesting")
        harvestSub.create()

        multipleFilesFileset.markOpen(False)
        self.assertEqual(multipleFilesFileset.open, False, "Fileset should now be closed")

        jobFactory = self.splitterFactory(package="WMCore.WMBS", subscription=harvestSub)
        jobGroups = jobFactory(dqmHarvestUnit="multiRun")
        self.assertEqual(len(jobGroups), 1)

        for jobGroup in jobGroups:
            self.assertEqual(len(jobGroup.jobs), 1)
Ejemplo n.º 20
0
    def createTestJob(subscriptionType="Merge"):
        """
        _createTestJob_

        Create a test job with two files as input.  This will also create the
        appropriate workflow, jobgroup and subscription.
        """

        testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))
        testFileA.create()
        testFileB.create()

        testJob = Job(name=makeUUID(), files=[testFileA, testFileB])
        testJob["couch_record"] = "somecouchrecord"
        testJob["location"] = "test.site.ch"
        testJob.create(group=testJobGroup)
        testJob.associateFiles()

        return testJob
Ejemplo n.º 21
0
    def testD_NonContinuousLumis(self):
        """
        _NonContinuousLumis_

        Test and see if LumiBased can work when the lumis are non continuous
        """


        baseName = makeUUID()
        nFiles = 10

        testFileset = Fileset(name = baseName)
        testFileset.create()
        for i in range(nFiles):
            newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
                           events = 100, locations = "somese.cern.ch")
            # Set to two non-continuous lumi numbers
            lumis = [100 + i, 200 + i]
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            testFileset.addFile(newFile)

        testFileset.commit()


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "LumiBased",
                                         type = "Processing")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = testSubscription)

        jobGroups = jobFactory(lumis_per_job = 2,
                               halt_job_on_file_boundaries = False,
                               splitOnRun = False,
                               performance = self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 10)
        for j in jobs:
            runs = j['mask'].getRunAndLumis()
            for r in runs.keys():
                self.assertEqual(len(runs[r]), 2)
                for l in runs[r]:
                    # Each run should have two lumis
                    # Each lumi should be of form [x, x]
                    # meaning that the first and last lumis are the same
                    self.assertEqual(len(l), 2)
                    self.assertEqual(l[0], l[1])
            self.assertEqual(j['estimatedJobTime'], 100 * 12)
            self.assertEqual(j['estimatedDiskUsage'], 100 * 400)
            self.assertEqual(j['estimatedMemoryUsage'], 2300)


        return
Ejemplo n.º 22
0
    def _createThisSubscription(self, initialCounter=1):
        """
        Private function to create a fileset and subscription with
        different fileset and file names

        :param initialCounter: just a simple integer to be appended to files
        :return: an splitter instance (jobFactory)
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        testFileset = Fileset(name='Fileset%s' % initialCounter)

        fileA = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=800)
        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("T1_US_FNAL_Disk")

        initialCounter = int(initialCounter) + 1
        fileB = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=400)
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("T1_US_FNAL_Disk")

        initialCounter = int(initialCounter) + 1
        fileC = File(lfn="/this/is/file%s" % initialCounter, size=1000, events=500)
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("T1_US_FNAL_Disk")

        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        return jobFactory
Ejemplo n.º 23
0
    def setupRepackWorkflow(self):
        """
        _setupRepackWorkflow_

        Populate WMBS with a repack-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Repack_Run481516_StreamZ'
        mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW',
                      'RepackMergewrite_SingleNeutrino_RAW']

        self.stateMap = {'Merge': [],
                         'Processing Done': []}
        self.orderedStates = ['Merge', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        repackTask = workload.newTask('Repack')
        for task in mergeTasks:
            repackTask.addTask(task)
        repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW')

        specPath = os.path.join(self.testDir, 'Repack.pkl')
        workload.save(specPath)

        # Populate WMBS
        topFileset = Fileset(name='TestStreamerFileset')
        topFileset.create()

        options = {'spec': specPath, 'owner': 'ItsAMeMario',
                   'name': workflowName, 'wfType': 'tier0'}
        topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(topFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topFileset)
        for task in mergeTasks:
            mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options)
            mergeWorkflow.create()
            unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task)
            unmergedFileset.create()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.create()
            self.stateMap['Processing Done'].append(unmergedFileset)
        cleanupWorkflow = Workflow(task='/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW',
                                   **options)
        cleanupWorkflow.create()
        unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup')
        unmergedFileset.create()
        cleanupSub = Subscription(unmergedFileset, cleanupWorkflow)
        cleanupSub.create()

        return
Ejemplo n.º 24
0
    def testParentageByJob(self):
        """
        _testParentageByJob_
        
        Tests the DAO that assigns parentage by Job
        """

        testWorkflow = Workflow(spec = 'hello', owner = "mnorman",
                                name = "wf001", task="basicWorkload/Production")
        testWorkflow.create()
        testFileset = Fileset(name = "TestFileset")
        testFileset.create()
        testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased")
        testSubscription.create()
        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024,
                              events = 20, checksums = {'cksum': 1})
        testFileParentA.addRun(Run( 1, *[45]))
        testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024,
                              events = 20, checksums = {'cksum': 1})
        testFileParentB.addRun(Run( 1, *[45]))
        testFileParentA.create()
        testFileParentB.create()

        testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10,
                         checksums = {'cksum':1})
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()

        testJobA = Job()
        testJobA.create(group = testJobGroup)
        testJobA.addFile(testFileParentA)
        testJobA.addFile(testFileParentB)
        testJobA.associateFiles()


        parentAction = self.daofactory(classname = "Files.SetParentageByJob")
        parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']})


        testFileB = File(id = testFileA["id"])
        testFileB.loadData(parentage = 1)

        goldenFiles = [testFileParentA, testFileParentB]
        for parentFile in testFileB["parents"]:
            self.assertEqual(parentFile in goldenFiles, True,
                   "ERROR: Unknown parent file")
            goldenFiles.remove(parentFile)

        self.assertEqual(len(goldenFiles), 0,
                         "ERROR: Some parents are missing")
Ejemplo n.º 25
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site,
                        taskType='Processing', name=None, wfPrio=1, changeState=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        changeState is an instance of the ChangeState class to make job status changes
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec, owner="tapas",
                                name=name, task="basicWorkload/Production",
                                priority=wfPrio)
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):
            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type=taskType,
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name, task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        if changeState:
            for group in jobGroupList:
                changeState.propagate(group.jobs, 'created', 'new')

        return jobGroupList
Ejemplo n.º 26
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]):
        """
        Creates a series of jobGroups for submissions

        """

        jobGroupList = []

        testWorkflow = Workflow(
            spec=workloadSpec,
            owner="tapas",
            name=makeUUID(),
            task="basicWorkload/Production",
            owner_vogroup="phgroup",
            owner_vorole="cmsrole",
        )
        testWorkflow.create()

        # Create subscriptions
        for i in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(
                fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased"
            )
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(
                name=name,
                task=task,
                nJobs=nJobs,
                jobGroup=testJobGroup,
                fileset=testFileset,
                sub=testSubscription.exists(),
                site=site,
                bl=bl,
                wl=wl,
            )

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList
Ejemplo n.º 27
0
    def testDeleteTransaction(self):
        """
        _testDeleteTransaction_

        Create a new job and commit it to the database.  Start a new transaction
        and delete the file from the database.  Verify that the file has been
        deleted.  After that, roll back the transaction and verify that the
        job is once again in the database.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileA.create()
        testFileB.create()

        testJob = Job(name="TestJob", files=[testFileA, testFileB])

        assert testJob.exists() is False, \
            "ERROR: Job exists before it was created"

        testJob.create(group=testJobGroup)

        assert testJob.exists() >= 0, \
            "ERROR: Job does not exist after it was created"

        myThread = threading.currentThread()
        myThread.transaction.begin()

        testJob.delete()

        assert testJob.exists() is False, \
            "ERROR: Job exists after it was delete"

        myThread.transaction.rollback()

        assert testJob.exists() >= 0, \
            "ERROR: Job does not exist after transaction was rolled back."

        return
Ejemplo n.º 28
0
    def stuffWMBS(self):
        """
        _stuffWMBS_

        Stuff WMBS with workflows
        """
        workflow = Workflow(spec = 'spec.xml', name = 'ReRecoTest_v0Emulator',
                            task = '/ReRecoTest_v0Emulator/Test', priority = 10)
        workflow.create()
        inputFileset = Fileset(name = 'TestFileset')
        inputFileset.create()
        subscription = Subscription(inputFileset, workflow)
        subscription.create()
Ejemplo n.º 29
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name = baseName)
        testFileset.create()
        parentFile = File('%s_parent' % (baseName), size = 1000, events = 100,
                          locations = set(["somese.cern.ch"]))
        parentFile.create()
        for i in range(nFiles):
            newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
                           events = 100, locations = "somese.cern.ch")
            lumis = []
            for lumi in range(lumisPerFile):
                if rand:
                    lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                else:
                    lumis.append((100 * i) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            newFile.addParent(parentFile['lfn'])
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000,
                               events = 100, locations = "otherse.cern.ch")
                lumis = []
                for lumi in range(lumisPerFile):
                    if rand:
                        lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                    else:
                        lumis.append((100 * i) + lumi)
                newFile.addRun(Run(i, *lumis))
                newFile.create()
                newFile.addParent(parentFile['lfn'])
                testFileset.addFile(newFile)
        testFileset.commit()


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "LumiBased",
                                         type = "Processing")
        testSubscription.create()

        return testSubscription
Ejemplo n.º 30
0
    def createTestJobs(self, nJobs, cacheDir):
        """
        _createTestJobs_

        Create several jobs
        """


        testWorkflow = Workflow(spec = "spec.xml", owner = "Simon",
                                name = "wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name = "TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow,
                                        type = "Processing",
                                        split_algo = "FileBased")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        # Create a file
        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')
        testFileA.create()

        baseName = makeUUID()

        # Now create a job
        for i in range(nJobs):
            testJob = Job(name = '%s-%i' % (baseName, i))
            testJob.addFile(testFileA)
            testJob['location'] = 'malpaquet'
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob.create(testJobGroup)
            testJob.save()
            testJobGroup.add(testJob)

        testJobGroup.commit()

        # Set test job caches
        for job in testJobGroup.jobs:
            job.setCache(cacheDir)

        return testJobGroup
Ejemplo n.º 31
0
class SiblingProcessingBasedTest(unittest.TestCase):
    """
    _SiblingProcessingBasedTest_

    Test SiblingProcessing job splitting.
    """
    def setUp(self):
        """
        _setUp_

        Setup the database connections and schema.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "Locations.New")
        locationAction.execute("site1", seName = "somese.cern.ch")
        locationAction.execute("site2", seName = "somese2.cern.ch")

        self.testFilesetA = Fileset(name = "FilesetA")
        self.testFilesetA.create()
        self.testFilesetB = Fileset(name = "FilesetB")
        self.testFilesetB.create()

        self.testFileA = File("testFileA", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileA.create()
        self.testFileB = File("testFileB", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileB.create()
        self.testFileC = File("testFileC", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileC.create()

        self.testFilesetA.addFile(self.testFileA)
        self.testFilesetA.addFile(self.testFileB)
        self.testFilesetA.addFile(self.testFileC)
        self.testFilesetA.commit()

        self.testFileD = File("testFileD", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileD.create()
        self.testFileE = File("testFileE", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileE.create()
        self.testFileF = File("testFileF", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileF.create()

        self.testFilesetB.addFile(self.testFileD)
        self.testFilesetB.addFile(self.testFileE)
        self.testFilesetB.addFile(self.testFileF)
        self.testFilesetB.commit()

        testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve",
                                 name = "wfA", task = "Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve",
                                 name = "wfB", task = "Test")
        testWorkflowB.create()
        testWorkflowC = Workflow(spec = "specC.xml", owner = "Steve",
                                 name = "wfC", task = "Test")
        testWorkflowC.create()
        testWorkflowD = Workflow(spec = "specD.xml", owner = "Steve",
                                 name = "wfD", task = "Test")
        testWorkflowD.create()

        self.testSubscriptionA = Subscription(fileset = self.testFilesetA,
                                              workflow = testWorkflowA,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionA.create()
        self.testSubscriptionB = Subscription(fileset = self.testFilesetB,
                                              workflow = testWorkflowB,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionB.create()
        self.testSubscriptionC = Subscription(fileset = self.testFilesetB,
                                              workflow = testWorkflowC,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionC.create()
        self.testSubscriptionD = Subscription(fileset = self.testFilesetB,
                                              workflow = testWorkflowD,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionD.create()

        deleteWorkflow = Workflow(spec = "specE.xml", owner = "Steve",
                                  name = "wfE", task = "Test")
        deleteWorkflow.create()

        self.deleteSubscriptionA = Subscription(fileset = self.testFilesetA,
                                                workflow = deleteWorkflow,
                                                split_algo = "SiblingProcessingBased",
                                                type = "Cleanup")
        self.deleteSubscriptionA.create()
        self.deleteSubscriptionB = Subscription(fileset = self.testFilesetB,
                                                workflow = deleteWorkflow,
                                                split_algo = "SiblingProcessingBased",
                                                type = "Cleanup")
        self.deleteSubscriptionB.create()
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def testSiblingProcessing(self):
        """
        _testSiblingProcessing_

        Verify that the sibling processing split works correctly dealing with
        failed files and acquiring files correctly.
        """
        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = self.deleteSubscriptionA)
        deleteFactoryB = splitter(package = "WMCore.WMBS",
                                  subscription = self.deleteSubscriptionB)

        result = deleteFactoryA()

        assert len(result) == 0, \
               "Error: No jobs should be returned."

        result = deleteFactoryB()

        assert len(result) == 0, \
               "Error: No jobs should be returned."

        self.testSubscriptionA.completeFiles(self.testFileA)

        result = deleteFactoryA(files_per_job = 1)

        assert len(result) == 1, \
               "Error: Only one jobgroup should be returned."
        assert len(result[0].jobs) == 1, \
               "Error: There should only be one job in the jobgroup."
        assert len(result[0].jobs[0]["input_files"]) == 1, \
               "Error: Job should only have one input file."
        assert result[0].jobs[0]["input_files"][0]["lfn"] == "testFileA", \
               "Error: Input file for job is wrong."
        assert list(result[0].jobs[0]["input_files"][0]["locations"]) == ["somese.cern.ch"], \
               "Error: File location is wrong."

        result = deleteFactoryB(files_per_job = 1)

        assert len(result) == 0, \
               "Error: Second subscription should have no jobs."

        result = deleteFactoryA(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        self.testSubscriptionB.completeFiles(self.testFileD)
        self.testSubscriptionC.failFiles(self.testFileD)

        result = deleteFactoryA(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        result = deleteFactoryB(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        self.testSubscriptionD.failFiles(self.testFileD)

        result = deleteFactoryA(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        result = deleteFactoryB(files_per_job = 1)

        assert len(result) == 0, \
               "Error: No job groups should have been created."

        self.testSubscriptionB.completeFiles([self.testFileE, self.testFileF])
        self.testSubscriptionC.completeFiles([self.testFileE, self.testFileF])
        self.testSubscriptionD.completeFiles([self.testFileE, self.testFileF])

        result = deleteFactoryB(files_per_job = 10)

        assert len(result) == 0, \
               "Error: No jobs should have been created."

        self.testFilesetB.markOpen(False)

        result = deleteFactoryB(files_per_job = 10)

        assert len(result) == 1, \
               "Error: One jobgroup should have been returned."
        assert len(result[0].jobs) == 1, \
               "Error: There should only be one job in the jobgroup."
        assert len(result[0].jobs[0]["input_files"]) == 2, \
               "Error: Job should only have one input file."

        lfns = [result[0].jobs[0]["input_files"][0]["lfn"], result[0].jobs[0]["input_files"][1]["lfn"]]

        assert "testFileE" in lfns, \
               "Error: TestFileE missing from job input."
        assert "testFileF" in lfns, \
               "Error: TestFileF missing from job input."

        self.assertEqual(len(self.deleteSubscriptionB.availableFiles()), 0,
                         "Error: There should be no available files.")

        completeFiles = self.deleteSubscriptionB.filesOfStatus("Completed")
        self.assertEqual(len(completeFiles), 1,
                         "Error: There should only be one complete file.")
        self.assertEqual(list(completeFiles)[0]["lfn"], "testFileD",
                         "Error: Test file D should be complete.")

        return

    def testMultipleLocations(self):
        """
        _testMultipleLocations_

        Verify that the sibling processing based algorithm doesn't create jobs
        that run over files at multiple sites.
        """
        testFile1 = File("testFile1", size = 1000, events = 100,
                         locations = set(["somese2.cern.ch"]))
        testFile1.create()
        testFile2 = File("testFile2", size = 1000, events = 100,
                         locations = set(["somese2.cern.ch"]))
        testFile2.create()
        testFile3 = File("testFile3", size = 1000, events = 100,
                         locations = set(["somese2.cern.ch"]))
        testFile3.create()

        self.testFilesetA.addFile(testFile1)
        self.testFilesetA.addFile(testFile2)
        self.testFilesetA.addFile(testFile3)
        self.testFilesetA.commit()
        self.testFilesetA.markOpen(False)

        self.testSubscriptionA.completeFiles([testFile1, testFile2, testFile3])
        self.testSubscriptionA.completeFiles([self.testFileA, self.testFileB, self.testFileC])

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = self.deleteSubscriptionA)

        result = deleteFactoryA(files_per_job = 50)

        assert len(result) == 2, \
               "Error: Wrong number of jobgroups returned."

        goldenFilesA = ["testFileA", "testFileB", "testFileC"]
        goldenFilesB = ["testFile1", "testFile2", "testFile3"]

        locations = {"testFileA": "somese.cern.ch", "testFileB": "somese.cern.ch",
                     "testFileC": "somese.cern.ch", "testFile1": "somese2.cern.ch",
                     "testFile2": "somese2.cern.ch", "testFile3": "somese2.cern.ch"}

        for jobGroup in result:
            assert len(jobGroup.jobs) == 1, \
                   "Error: Wrong number of jobs in jobgroup."
            assert len(jobGroup.jobs[0]["input_files"]) == 3, \
                   "Error: Wrong number of input files in job."

            jobSite = list(jobGroup.jobs[0]["input_files"][0]["locations"])[0]

            if jobSite == "somese.cern.ch":
                goldenFiles = goldenFilesA
            else:
                goldenFiles = goldenFilesB

            for jobFile in jobGroup.jobs[0]["input_files"]:
                assert list(jobFile["locations"])[0] == locations[jobFile["lfn"]], \
                       "Error: Wrong site for file."
                goldenFiles.remove(jobFile["lfn"])

            assert len(goldenFiles) == 0,  \
                   "Error: Files are missing."

        return

    def testLargeNumberOfFiles(self):
        """
        _testLargeNumberOfFiles_

        Setup a subscription with 500 files and verify that the splitting algo
        works correctly.
        """
        testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve",
                                 name = "wfA", task = "Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve",
                                 name = "wfB", task = "Test")
        testWorkflowB.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i), size = 1000, events = 100,
                            locations = set(["somese.cern.ch"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(fileset = testFileset,
                                         workflow = testWorkflowA,
                                         split_algo = "FileBased",
                                         type = "Processing")
        testSubscriptionA.create()
        testSubscriptionB = Subscription(fileset = testFileset,
                                         workflow = testWorkflowB,
                                         split_algo = "SiblingProcessingBased",
                                         type = "Processing")
        testSubscriptionB.create()

        testSubscriptionA.completeFiles(allFiles)

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = testSubscriptionB)

        result = deleteFactoryA(files_per_job = 50)
        self.assertEqual(len(result), 1,
                         "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10,
                         "Error: Wrong number of jobs returned.")

        return

    def testFilesWithoutOtherSubscriptions(self):
        """
        _testFilesWithoutOtherSubscriptions_

        Test the case where files only in the delete subscription
        can happen if cleanup of the other subscriptions is fast

        """
        testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve",
                                 name = "wfA", task = "Test")
        testWorkflowA.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        allFiles = []
        for i in range(500):
            testFile = File(str(i), size = 1000, events = 100,
                            locations = set(["somese.cern.ch"]))
            testFile.create()
            allFiles.append(testFile)
            testFileset.addFile(testFile)
        testFileset.commit()

        testSubscriptionA = Subscription(fileset = testFileset,
                                         workflow = testWorkflowA,
                                         split_algo = "SiblingProcessingBased",
                                         type = "Processing")
        testSubscriptionA.create()

        splitter = SplitterFactory()
        deleteFactoryA = splitter(package = "WMCore.WMBS",
                                  subscription = testSubscriptionA)

        result = deleteFactoryA(files_per_job = 50)
        self.assertEqual(len(result), 1,
                         "Error: Wrong number of job groups returned.")
        self.assertEqual(len(result[0].jobs), 10,
                         "Error: Wrong number of jobs returned.")

        return
Ejemplo n.º 32
0
    def testPersist(self):
        """
        _testPersist_

        This is the test class for function Propagate from module ChangeState
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        for i in range(4):
            newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"]))
            newFile.create()
            testFileset.addFile(newFile)

        testFileset.commit()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 4, \
            "Error: Splitting should have created four jobs."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"
        testJobB = jobGroup.jobs[1]
        testJobB["user"] = "******"
        testJobB["group"] = "DMWM"
        testJobB["taskType"] = "Processing"
        testJobC = jobGroup.jobs[2]
        testJobC["user"] = "******"
        testJobC["group"] = "DMWM"
        testJobC["taskType"] = "Processing"
        testJobD = jobGroup.jobs[3]
        testJobD["user"] = "******"
        testJobD["group"] = "DMWM"
        testJobD["taskType"] = "Processing"

        change.persist([testJobA, testJobB], "created", "new")
        change.persist([testJobC, testJobD], "new", "none")

        stateDAO = self.daoFactory(classname="Jobs.GetState")

        jobAState = stateDAO.execute(id=testJobA["id"])
        jobBState = stateDAO.execute(id=testJobB["id"])
        jobCState = stateDAO.execute(id=testJobC["id"])
        jobDState = stateDAO.execute(id=testJobD["id"])

        assert jobAState == "created" and jobBState == "created" and \
               jobCState == "new" and jobDState == "new", \
            "Error: Jobs didn't change state correctly."

        return
Ejemplo n.º 33
0
    def setUp(self):
        """
        _setUp_

        Setup the database connections and schema.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "Locations.New")
        locationAction.execute("site1", seName = "somese.cern.ch")
        locationAction.execute("site2", seName = "somese2.cern.ch")

        self.testFilesetA = Fileset(name = "FilesetA")
        self.testFilesetA.create()
        self.testFilesetB = Fileset(name = "FilesetB")
        self.testFilesetB.create()

        self.testFileA = File("testFileA", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileA.create()
        self.testFileB = File("testFileB", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileB.create()
        self.testFileC = File("testFileC", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileC.create()

        self.testFilesetA.addFile(self.testFileA)
        self.testFilesetA.addFile(self.testFileB)
        self.testFilesetA.addFile(self.testFileC)
        self.testFilesetA.commit()

        self.testFileD = File("testFileD", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileD.create()
        self.testFileE = File("testFileE", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileE.create()
        self.testFileF = File("testFileF", size = 1000, events = 100,
                              locations = set(["somese.cern.ch"]))
        self.testFileF.create()

        self.testFilesetB.addFile(self.testFileD)
        self.testFilesetB.addFile(self.testFileE)
        self.testFilesetB.addFile(self.testFileF)
        self.testFilesetB.commit()

        testWorkflowA = Workflow(spec = "specA.xml", owner = "Steve",
                                 name = "wfA", task = "Test")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec = "specB.xml", owner = "Steve",
                                 name = "wfB", task = "Test")
        testWorkflowB.create()
        testWorkflowC = Workflow(spec = "specC.xml", owner = "Steve",
                                 name = "wfC", task = "Test")
        testWorkflowC.create()
        testWorkflowD = Workflow(spec = "specD.xml", owner = "Steve",
                                 name = "wfD", task = "Test")
        testWorkflowD.create()

        self.testSubscriptionA = Subscription(fileset = self.testFilesetA,
                                              workflow = testWorkflowA,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionA.create()
        self.testSubscriptionB = Subscription(fileset = self.testFilesetB,
                                              workflow = testWorkflowB,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionB.create()
        self.testSubscriptionC = Subscription(fileset = self.testFilesetB,
                                              workflow = testWorkflowC,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionC.create()
        self.testSubscriptionD = Subscription(fileset = self.testFilesetB,
                                              workflow = testWorkflowD,
                                              split_algo = "FileBased",
                                              type = "Processing")
        self.testSubscriptionD.create()

        deleteWorkflow = Workflow(spec = "specE.xml", owner = "Steve",
                                  name = "wfE", task = "Test")
        deleteWorkflow.create()

        self.deleteSubscriptionA = Subscription(fileset = self.testFilesetA,
                                                workflow = deleteWorkflow,
                                                split_algo = "SiblingProcessingBased",
                                                type = "Cleanup")
        self.deleteSubscriptionA.create()
        self.deleteSubscriptionB = Subscription(fileset = self.testFilesetB,
                                                workflow = deleteWorkflow,
                                                split_algo = "SiblingProcessingBased",
                                                type = "Cleanup")
        self.deleteSubscriptionB.create()
        return
Ejemplo n.º 34
0
    def testUpdateLocation(self):
        """
        _testUpdateLocation_

        Check that we can update the location of a job through
        the state machine.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")
        locationAction.execute("site2", pnn="T1_US_FNAL_Disk")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")
        testSubscription.create()

        testFileA = File(lfn="SomeLFNA",
                         events=1024,
                         size=2048,
                         locations=set(["T2_CH_CERN", "T1_US_FNAL_Disk"]))
        testFileB = File(lfn="SomeLFNB",
                         events=1025,
                         size=2049,
                         locations=set(["T2_CH_CERN", "T1_US_FNAL_Disk"]))
        testFileA.create()
        testFileB.create()

        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 2, \
            "Error: Splitting should have created two jobs."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Merge"
        testJobA["site_cms_name"] = "site1"
        testJobB = jobGroup.jobs[1]
        testJobB["user"] = "******"
        testJobB["group"] = "DMWM"
        testJobB["taskType"] = "Processing"
        testJobB["site_cms_name"] = "site2"

        change.propagate([testJobA, testJobB], "new", "none")
        change.propagate([testJobA, testJobB], "created", "new")
        change.propagate([testJobA, testJobB], "executing", "created")

        testJobADoc = change.jobsdatabase.document(testJobA["couch_record"])

        maxKey = max(testJobADoc["states"].keys())
        transition = testJobADoc["states"][maxKey]
        self.assertEqual(transition["location"], "site1")

        testJobBDoc = change.jobsdatabase.document(testJobB["couch_record"])

        maxKey = max(testJobBDoc["states"].keys())
        transition = testJobBDoc["states"][maxKey]
        self.assertEqual(transition["location"], "site2")

        jobs = [{'jobid': 1, 'location': 'site2'}]

        change.recordLocationChange(jobs)

        testJobADoc = change.jobsdatabase.document(testJobA["couch_record"])

        maxKey = max(testJobADoc["states"].keys())
        transition = testJobADoc["states"][maxKey]
        self.assertEqual(transition["location"], "site2")

        listJobsDAO = self.daoFactory(classname="Jobs.GetLocation")
        jobid = [{'jobid': 1}, {'jobid': 2}]
        jobsLocation = listJobsDAO.execute(jobid)
        for job in jobsLocation:
            self.assertEqual(job['site_name'], 'site2')

        return
Ejemplo n.º 35
0
    def testRetryCount(self):
        """
        _testRetryCount_

        Verify that the retry count is incremented when we move out of the
        submitcooloff or jobcooloff state.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        for i in range(4):
            newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"]))
            newFile.create()
            testFileset.addFile(newFile)

        testFileset.commit()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 4, \
            "Error: Splitting should have created four jobs."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"
        testJobB = jobGroup.jobs[1]
        testJobB["user"] = "******"
        testJobB["group"] = "DMWM"
        testJobB["taskType"] = "Processing"
        testJobC = jobGroup.jobs[2]
        testJobC["user"] = "******"
        testJobC["group"] = "DMWM"
        testJobC["taskType"] = "Processing"
        testJobD = jobGroup.jobs[3]
        testJobD["user"] = "******"
        testJobD["group"] = "DMWM"
        testJobD["taskType"] = "Processing"

        change.persist([testJobA], "created", "submitcooloff")
        change.persist([testJobB], "created", "jobcooloff")
        change.persist([testJobC, testJobD], "new", "none")

        testJobA.load()
        testJobB.load()
        testJobC.load()
        testJobD.load()

        assert testJobA["retry_count"] == 1, \
            "Error: Retry count is wrong."
        assert testJobB["retry_count"] == 1, \
            "Error: Retry count is wrong."
        assert testJobC["retry_count"] == 0, \
            "Error: Retry count is wrong."
        assert testJobD["retry_count"] == 0, \
            "Error: Retry count is wrong."

        return
Ejemplo n.º 36
0
    def testReRecoDroppingRECO(self):
        """
        _testReRecoDroppingRECO_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on. This tests run on unmerged RECO output
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = ReRecoWorkloadFactory.getTestArguments()
        dataProcArguments["ProcessingString"] = "ProcString"
        dataProcArguments["ConfigCacheID"] = recoConfig
        dataProcArguments.update({
            "SkimName1": "SomeSkim",
            "SkimInput1": "RECOoutput",
            "Skim1ConfigCacheID": skimConfig
        })
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"
        dataProcArguments["TransientOutputModules"] = ["RECOoutput"]
        dataProcArguments["EnableHarvesting"] = True
        dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig()

        factory = ReRecoWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction(
            "TestWorkload", dataProcArguments)

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \
                         SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \
                         Merged.mergedLFNBase,
                         '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1')

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "DataProcessing",
                                    "SomeBlock",
                                    cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        skimWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/DataProcessing/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"}
        for goldenOutputMod, tier in viewitems(goldenOutputMods):
            fset = goldenOutputMod + tier
            mergedOutput = skimWorkflow.outputMap[fset][0][
                "merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s"
                % (goldenOutputMod, tier),
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod, tier in viewitems(goldenOutputMods):
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" %
                goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap[
                "Merged%s" % tier][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap[
                "Merged%s" % tier][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s"
                % (goldenOutputMod, tier),
                "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s"
                % (goldenOutputMod, tier),
                "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(
            name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput, tier in viewitems(goldenOutputMods):
            fset = skimOutput + tier
            unmerged = Fileset(
                name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" %
                fset)
            unmerged.loadData()
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" %
                skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmerged,
                                             workflow=mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"],
                             "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput, tier in viewitems(goldenOutputMods):
            fset = skimOutput + tier
            unmerged = Fileset(
                name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" %
                fset)
            unmerged.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmerged%s"
                % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in goldenOutputMods:
            skimMergeLogCollect = Fileset(
                name=
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive"
                % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect"
                % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset=skimMergeLogCollect,
                                         workflow=skimMergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        return
Ejemplo n.º 37
0
class FixedDelayTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "Locations.New")
        locationAction.execute(siteName = "site1", pnn = "T2_CH_CERN")

        self.multipleFileFileset = Fileset(name = "TestFileset1")
        self.multipleFileFileset.create()
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"]))
            newFile.addRun(Run(i, *[45+i]))
            newFile.create()
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name = "TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["T2_CH_CERN"]))
        newFile.addRun(Run(1, *[45]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleFileLumiset = Fileset(name = "TestFileset3")
        self.multipleFileLumiset.create()
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"]))
            newFile.addRun(Run(1, *[45+i/3]))
            newFile.create()
            self.multipleFileLumiset.addFile(newFile)
        self.multipleFileLumiset.commit()

        self.singleLumiFileset = Fileset(name = "TestFileset4")
        self.singleLumiFileset.create()
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"]))
            newFile.addRun(Run(1, *[45]))
            newFile.create()
            self.singleLumiFileset.addFile(newFile)
        self.singleLumiFileset.commit()


        testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf001", task="Test")
        testWorkflow.create()
        self.multipleFileSubscription  = Subscription(fileset = self.multipleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")
        self.singleFileSubscription    = Subscription(fileset = self.singleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")
        self.multipleLumiSubscription  = Subscription(fileset = self.multipleFileLumiset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")
        self.singleLumiSubscription    = Subscription(fileset = self.singleLumiFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")

        self.multipleFileSubscription.create()
        self.singleFileSubscription.create()
        self.multipleLumiSubscription.create()
        self.singleLumiSubscription.create()
        return

    def tearDown(self):
        """
        _tearDown_

        Nothing to do...
        """
        self.testInit.clearDatabase()
        return

    def testNone(self):
        """
        _testNone_

        Since the subscriptions are open, we shouldn't get any jobs back
        """
        splitter = SplitterFactory()
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time = int(time.time())*2)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time = int(time.time())*2)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time = int(time.time())*2)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time = int(time.time())*2)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        return

    def testClosed(self):
        """
        _testClosed_

        Since the subscriptions are closed and none of the files have been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."

        job = jobGroups[0].jobs.pop()

        assert job.getFiles(type = "lfn") == ["/some/file/name"], \
               "ERROR: Job contains unknown files."

        self.multipleFileSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs),1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEqual(len(myfiles), 10)

        self.multipleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs),1)
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEqual(len(myfiles), 10)
        #self.assertEqual(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.getFileset().markOpen(False)
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        assert len(jobGroups) == 1, \
               "ERROR: JobFactory didn't return one JobGroup."

        assert len(jobGroups[0].jobs) == 1, \
               "ERROR: JobFactory didn't create a single job."
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEqual(len(myfiles), 10)


    def testAllAcquired(self):
        """
        _testAllAcquired_
        should all return no job groups
        """
        splitter = SplitterFactory()
        self.singleFileSubscription.acquireFiles(
                           self.singleFileSubscription.availableFiles())
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        self.multipleFileSubscription.acquireFiles(
                           self.multipleFileSubscription.availableFiles())
        jobFactory = splitter(self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        self.multipleLumiSubscription.acquireFiles(
                           self.multipleLumiSubscription.availableFiles())
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

        self.singleLumiSubscription.acquireFiles(
                           self.singleLumiSubscription.availableFiles())
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")

    def testClosedSomeAcquired(self):
        """
        _testClosedSomeAcquired_
        since the subscriptions are closed and none of the files ahve been
        acquired, all of the files should show up
        """
        splitter = SplitterFactory()
        self.multipleFileSubscription.getFileset().markOpen(False)

        self.singleFileSubscription.acquireFiles(
                           [self.singleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(jobGroups, [], "Should have returned a null set")



        self.multipleFileSubscription.getFileset().markOpen(False)
        self.multipleFileSubscription.acquireFiles(
                           [self.multipleFileSubscription.availableFiles().pop()])
        jobFactory = splitter(package = "WMCore.WMBS", subscription =self.multipleFileSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEqual(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.multipleLumiSubscription.getFileset().markOpen(False)
        self.multipleLumiSubscription.acquireFiles(
                           [self.multipleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.multipleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEqual(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.singleLumiSubscription.getFileset().markOpen(False)
        self.singleLumiSubscription.acquireFiles(
                           [self.singleLumiSubscription.availableFiles().pop()])
        jobFactory = splitter(self.singleLumiSubscription)
        jobGroups = jobFactory(trigger_time = 1)
        self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup")
        self.assertEqual(len(jobGroups[0].jobs), 1, \
               "JobFactory should have made one job")
        myfiles = jobGroups[0].jobs[0].getFiles()
        self.assertEqual(len(myfiles), 9, \
                "JobFactory should have provides us with 9 files")

        self.assertEqual(len(myfiles), 9)
Ejemplo n.º 38
0
    def _createSubscriptionsInWMBS(self,
                                   task,
                                   fileset,
                                   alternativeFilesetClose=False):
        """
        __createSubscriptionsInWMBS_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        # FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(
            spec=self.wmSpec.specUrl(),
            owner=self.wmSpec.getOwner()["name"],
            dn=self.wmSpec.getOwner().get("dn", "unknown"),
            group=self.wmSpec.getOwner().get("group", "unknown"),
            owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
            owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"),
            name=self.wmSpec.name(),
            task=task.getPathName(),
            wfType=self.wmSpec.getDashboardActivity(),
            alternativeFilesetClose=alternativeFilesetClose,
            priority=self.wmSpec.priority())
        workflow.create()
        subscription = Subscription(fileset=fileset,
                                    workflow=workflow,
                                    split_algo=task.jobSplittingAlgorithm(),
                                    type=task.getPrimarySubType())
        subscription.create()

        ### FIXME: I'm pretty sure we can improve how we handle this site white/black list
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": True
            }])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": False
            }])

        if self.topLevelSubscription is None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription %s created for %s",
                         subscription["id"], self.wmSpec.name())
        else:
            logging.info("Child subscription %s created for %s",
                         subscription["id"], self.wmSpec.name())

        outputModules = task.getOutputModulesForTask()
        ignoredOutputModules = task.getIgnoredOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                if outputModuleName in ignoredOutputModules:
                    msg = "%s has %s as IgnoredOutputModule, skipping fileset creation."
                    logging.info(msg, task.getPathName(), outputModuleName)
                    continue
                dataTier = getattr(getattr(outputModule, outputModuleName),
                                   "dataTier", '')
                filesetName = self.outputFilesetName(task, outputModuleName,
                                                     dataTier)
                outputFileset = Fileset(filesetName)
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        childDatatier = getattr(childTask.data.input,
                                                'dataTier', '')
                        if childTask.taskType() in [
                                "Cleanup", "Merge"
                        ] and childDatatier != dataTier:
                            continue
                        elif childTask.taskType(
                        ) == "Merge" and childDatatier == dataTier:
                            filesetName = self.outputFilesetName(
                                childTask, "Merged", dataTier)
                            mergedOutputFileset = Fileset(filesetName)
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(
                                getattr(outputModule, outputModuleName),
                                "primaryDataset", None)
                            if primaryDataset is not None:
                                self.mergeOutputMapping[
                                    mergedOutputFileset.id] = primaryDataset

                        self._createSubscriptionsInWMBS(
                            childTask, outputFileset, alternativeFilesetClose)

                if mergedOutputFileset is None:
                    workflow.addOutput(outputModuleName + dataTier,
                                       outputFileset, outputFileset)
                else:
                    workflow.addOutput(outputModuleName + dataTier,
                                       outputFileset, mergedOutputFileset)

        return
Ejemplo n.º 39
0
    def testMonteCarloFromGEN(self):
        """
        _testMonteCarloFromGEN_

        Create a MonteCarloFromGEN workflow and verify it installs into WMBS
        correctly.
        """
        arguments = MonteCarloFromGENWorkloadFactory.getTestArguments()
        arguments["ConfigCacheID"] = self.injectConfig()
        arguments["CouchDBName"] = "mclhe_t"
        arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias"

        factory = MonteCarloFromGENWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction(
            "TestWorkload", arguments)

        outputDatasets = testWorkload.listOutputDatasets()
        self.assertEqual(len(outputDatasets), 2)
        self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterRECO-v1/RECO" in
                        outputDatasets)
        self.assertTrue(
            "/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-v1/ALCARECO" in
            outputDatasets)

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "MonteCarloFromGEN",
                                    "SomeBlock",
                                    cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/MonteCarloFromGEN")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")
        self.assertEqual(procWorkflow.wfType, 'production')

        goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged"
                % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/MonteCarloFromGEN/unmerged-%s" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s"
                % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(
            name="TestWorkload-MonteCarloFromGEN-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(
            procSubscription["type"], "Production",
            "Error: Wrong subscription type: %s" % procSubscription["type"])
        self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(
            name="/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO"
        )
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset=unmergedReco,
                                         workflow=recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            mergeSubscription["split_algo"], "ParentlessMergeBySize",
            "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        unmergedAlca = Fileset(
            name=
            "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO"
        )
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset=unmergedAlca,
                                         workflow=alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            mergeSubscription["split_algo"], "ParentlessMergeBySize",
            "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]:
            unmerged = Fileset(
                name="/TestWorkload/MonteCarloFromGEN/unmerged-%s" %
                procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s"
                % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name="/TestWorkload/MonteCarloFromGEN/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/MonteCarloFromGEN/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive"
        )
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect"
        )
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive"
        )
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect"
        )
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Ejemplo n.º 40
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName="site1", seName="somese.cern.ch")
        locationAction.execute(siteName="site2", seName="otherse.cern.ch")

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/',
                          size=1000,
                          events=100,
                          locations=set(["somese.cern.ch"]))
        parentFile.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45]))
            newFile.create()
            newFile.addParent(lfn=parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["somese.cern.ch"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["otherse.cern.ch",
                                          "somese.cern.ch"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription.create()

        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.multipleSiteSubscription.create()

        self.performanceParams = {
            'timePerEvent': 12,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }
        return
Ejemplo n.º 41
0
    def testMask(self):
        """
        _testMask_

        Test the new mask setup
        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102])
        testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202])
        testJob.create(group=testJobGroup)

        loadJob = Job(id=testJob.exists())
        loadJob.loadData()

        runs = loadJob['mask'].getRunAndLumis()
        self.assertEqual(len(runs), 2)
        self.assertEqual(runs[100], [[101, 102]])
        self.assertEqual(runs[200], [[201, 202]])

        bigRun = Run(100, *[101, 102, 103, 104])
        badRun = Run(300, *[1001, 1002])
        result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun])

        self.assertEqual(len(result), 1)
        alteredRun = result.pop()
        self.assertEqual(alteredRun.run, 100)
        self.assertEqual(alteredRun.lumis, [101, 102])

        run0 = Run(300, *[1001, 1002])
        run1 = Run(300, *[1001, 1002])
        loadJob['mask'].filterRunLumisByMask([run0, run1])

        return
Ejemplo n.º 42
0
class FileBasedTest(unittest.TestCase):
    """
    _FileBasedTest_

    Test file based job splitting.
    """
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName="site1", seName="somese.cern.ch")
        locationAction.execute(siteName="site2", seName="otherse.cern.ch")

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/',
                          size=1000,
                          events=100,
                          locations=set(["somese.cern.ch"]))
        parentFile.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45]))
            newFile.create()
            newFile.addParent(lfn=parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["somese.cern.ch"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["otherse.cern.ch",
                                          "somese.cern.ch"]))
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription.create()

        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.multipleSiteSubscription.create()

        self.performanceParams = {
            'timePerEvent': 12,
            'memoryRequirement': 2300,
            'sizePerEvent': 400
        }
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def createLargeFileBlock(self):
        """
        _createLargeFileBlock_

        Creates a large group of files for testing
        """
        testFileset = Fileset(name="TestFilesetX")
        testFileset.create()
        for i in range(5000):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.create()
            testFileset.addFile(newFile)
        testFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="mnorman",
                                name="wf003",
                                task="Test")
        testWorkflow.create()

        largeSubscription = Subscription(fileset=testFileset,
                                         workflow=testWorkflow,
                                         split_algo="FileBased",
                                         type="Processing")
        largeSubscription.create()

        return largeSubscription

    def testExactFiles(self):
        """
        _testExactFiles_

        Test file based job splitting when the number of files per job is
        exactly the same as the number of files in the input fileset.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.singleFileSubscription)

        jobGroups = jobFactory(files_per_job=1,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 1)
        job = jobGroups[0].jobs.pop()
        self.assertEqual(job.getFiles(type="lfn"), ["/some/file/name"])
        self.assertEqual(job["estimatedMemoryUsage"], 2300)
        self.assertEqual(job["estimatedDiskUsage"], 400 * 100)
        self.assertEqual(job["estimatedJobTime"], 12 * 100)

        return

    def testMoreFiles(self):
        """
        _testMoreFiles_

        Test file based job splitting when the number of files per job is
        greater than the number of files in the input fileset.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.singleFileSubscription)

        jobGroups = jobFactory(files_per_job=10,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 1)
        job = jobGroups[0].jobs.pop()
        self.assertEqual(job.getFiles(type="lfn"), ["/some/file/name"])
        self.assertEqual(job["estimatedMemoryUsage"], 2300)
        self.assertEqual(job["estimatedDiskUsage"], 400 * 100)
        self.assertEqual(job["estimatedJobTime"], 12 * 100)

        return

    def test2FileSplit(self):
        """
        _test2FileSplit_

        Test file based job splitting when the number of files per job is
        2, this should result in five jobs.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job=2,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 5)

        fileList = []
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job.getFiles()), 2)
            for file in job.getFiles(type="lfn"):
                fileList.append(file)
            self.assertEqual(job["estimatedMemoryUsage"], 2300)
            self.assertEqual(job["estimatedDiskUsage"], 400 * 100 * 2)
            self.assertEqual(job["estimatedJobTime"], 12 * 100 * 2)

        self.assertEqual(len(fileList), 10)

        return

    def test3FileSplit(self):
        """
        _test3FileSplit_

        Test file based job splitting when the number of files per job is
        3, this should result in four jobs.
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job=3,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 4)

        fileList = []
        for job in jobGroups[0].jobs:
            assert len(job.getFiles()) in [
                3, 1
            ], "ERROR: Job contains incorrect number of files"
            for file in job.getFiles(type="lfn"):
                fileList.append(file)
            if len(job.getFiles()) == 3:
                self.assertEqual(job["estimatedMemoryUsage"], 2300)
                self.assertEqual(job["estimatedDiskUsage"], 400 * 100 * 3)
                self.assertEqual(job["estimatedJobTime"], 12 * 100 * 3)
            elif len(job.getFiles()) == 1:
                self.assertEqual(job["estimatedMemoryUsage"], 2300)
                self.assertEqual(job["estimatedDiskUsage"], 400 * 100)
                self.assertEqual(job["estimatedJobTime"], 12 * 100)
            else:
                self.fail("Unexpected splitting reached")
        self.assertEqual(len(fileList), 10)

        return

    def testLocationSplit(self):
        """

        _testLocationSplit_

        This should test whether or not the FileBased algorithm understands that files at seperate sites
        cannot be in the same jobGroup (this is the current standard).

        """
        myThread = threading.currentThread()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleSiteSubscription)

        jobGroups = jobFactory(files_per_job=10,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 2)
        self.assertEqual(len(jobGroups[0].jobs), 1)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedMemoryUsage"], 2300)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedDiskUsage"],
                         100 * 400 * 5)
        self.assertEqual(jobGroups[0].jobs[0]["estimatedJobTime"],
                         100 * 12 * 5)
        self.assertEqual(len(jobGroups[1].jobs[0].getFiles()), 5)
        self.assertEqual(jobGroups[1].jobs[0]["estimatedMemoryUsage"], 2300)
        self.assertEqual(jobGroups[1].jobs[0]["estimatedDiskUsage"],
                         100 * 400 * 5)
        self.assertEqual(jobGroups[1].jobs[0]["estimatedJobTime"],
                         100 * 12 * 5)

        return

    def testLimit(self):
        """
        _testLimit_

        Test what happens when you limit the number of files.
        This should run each separate file in a separate loop,
        creating one jobGroups with one job with one file
        (The limit argument tells it what to do)
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job=10,
                               limit_file_loading=True,
                               file_load_limit=1,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 1)

        return

    def testRespectRunBoundaries(self):
        """
        _testRespectRunBoundaries_

        Test whether or not this thing will respect run boundaries
        """

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job=10,
                               respect_run_boundaries=True,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 10)

        return

    def test_getParents(self):
        """
        _getParents_

        Check that we can do the same as the TwoFileBased
        """
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleFileSubscription)

        jobGroups = jobFactory(files_per_job=2,
                               include_parents=True,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        self.assertEqual(len(jobGroups[0].jobs), 5)

        fileList = []
        for job in jobGroups[0].jobs:
            self.assertEqual(len(job.getFiles()), 2)
            for file in job.getFiles(type="lfn"):
                fileList.append(file)
        self.assertEqual(len(fileList), 10)

        for j in jobGroups[0].jobs:
            for f in j['input_files']:
                self.assertEqual(len(f['parents']), 1)
                self.assertEqual(list(f['parents'])[0]['lfn'], '/parent/lfn/')

        return

    def testZ_randomCrapForGenerators(self):
        """
        Either this works, and all other tests are obsolete, or it doesn't and they aren't.
        Either way, don't screw around with this.
        """
        def runCode(self, jobFactory):

            func = self.crazyAssFunction(jobFactory=jobFactory,
                                         file_load_limit=500)

            startTime = time.time()
            goFlag = True
            while goFlag:
                try:
                    res = func.next()
                    self.jobGroups.extend(res)
                except StopIteration:
                    goFlag = False

            stopTime = time.time()
            return jobGroups

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.multipleSiteSubscription)

        jobFactory.open()
        jobGroups = []

        a = self.crazyAssFunction(jobFactory=jobFactory, file_load_limit=2)

        for x in range(7):
            try:
                res = a.next()
                jobGroups.extend(res)
            except StopIteration:
                continue

        jobFactory.close()

        self.assertEqual(len(jobGroups), 6)
        for group in jobGroups:
            self.assertTrue(len(group.jobs) in [1, 2])
            for job in group.jobs:
                self.assertTrue(len(job['input_files']) in (1, 2))
                for file in job['input_files']:
                    self.assertTrue(file['locations'] in [
                        set(['somese.cern.ch']),
                        set(['otherse.cern.ch', 'somese.cern.ch'])
                    ])

        self.jobGroups = []

        subscript = self.createLargeFileBlock()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS", subscription=subscript)

        jobFactory.open()

        runCode(self, jobFactory)
        #cProfile.runctx("runCode(self, jobFactory)", globals(), locals(), "coroutine.stats")

        jobGroups = self.jobGroups

        self.assertEqual(len(jobGroups), 10)
        for group in jobGroups:
            self.assertEqual(len(group.jobs), 500)
            self.assertTrue(group.exists() > 0)

        jobFactory.close()
        return

    def crazyAssFunction(self, jobFactory, file_load_limit=1):
        groups = ['test']
        while groups != []:
            groups = jobFactory(files_per_job=1,
                                file_load_limit=file_load_limit,
                                performance=self.performanceParams)
            yield groups
Ejemplo n.º 43
0
    def createTestJobGroup(self,
                           name="TestWorkthrough",
                           specLocation="spec.xml",
                           error=False,
                           task="/TestWorkload/ReReco",
                           nJobs=10):
        """
        _createTestJobGroup_

        Generate a test WMBS JobGroup with real FWJRs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=specLocation,
                                owner="Simon",
                                name=name,
                                task=task)
        testWorkflow.create()

        testWMBSFileset = Fileset(name=name)
        testWMBSFileset.create()

        testFileA = File(lfn=makeUUID(), size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn=makeUUID(), size=1024, events=10)
        testFileB.addRun(Run(10, *[12312]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        report = Report()
        if error:
            path = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t/fwjrs",
                                "badBackfillJobReport.pkl")
        else:
            path = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t/fwjrs",
                                "PerformanceReport2.pkl")
        report.load(filename=path)

        self.changeState.propagate(testJobGroup.jobs, 'created', 'new')
        self.changeState.propagate(testJobGroup.jobs, 'executing', 'created')
        self.changeState.propagate(testJobGroup.jobs, 'complete', 'executing')
        for job in testJobGroup.jobs:
            job['fwjr'] = report
        self.changeState.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        self.changeState.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed')
        self.changeState.propagate(testJobGroup.jobs, 'exhausted', 'retrydone')
        self.changeState.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup
Ejemplo n.º 44
0
    def databaseWork(self):
        """
        Queries DB for all watched filesets, if a filesets matches become
        available, create the subscriptions
        """
        # Get all watched workflows
        availableWorkflows = self.getUnsubscribedWorkflows.execute()
        logging.debug("Found %s unsubscribed managed workflows" \
              % len(availableWorkflows))

        # Get all filesets to check if they match a wrokflow
        availableFilesets = self.getAllFilesets.execute()

        logging.debug("Found %s filesets" % len(availableFilesets))

        # Loop on unsubscribed workflows to match filesets
        for managedWorkflow in availableWorkflows:

            # Workflow object cache to pass into Subscription constructor
            wfObj = None

            for fileset in availableFilesets:

                # Fileset object cache
                fsObj = None

                # Load the location information
                #whitelist = Set()
                #blacklist = Set()
                # Location is only caf
                #locations = self.queries.getLocations(managedWorkflow['id'])
                #for location in locations:
                #    if bool(int(location['valid'])) == True:
                #        whitelist.add(location['site_name'])
                #    else:
                #        blacklist.add(location['site_name'])

                # Attempt to match workflows to filesets
                if re.match(managedWorkflow['fileset_match'], fileset['name']):

                    # Log in debug
                    msg = "Creating subscription for %s to workflow id %s"
                    msg %= (fileset['name'], managedWorkflow['workflow'])
                    logging.debug(msg)

                    # Match found - Load the fileset if not already loaded
                    if not fsObj:
                        fsObj = Fileset(id=fileset['id'])
                        fsObj.load()

                    # Load the workflow if not already loaded
                    if not wfObj:
                        wfObj = Workflow(id=managedWorkflow['workflow'])
                        wfObj.load()

                    # Create the subscription
                    newSub = Subscription(fileset = fsObj, \
                                     workflow = wfObj, \
                                     #whitelist = whitelist, \
                                     #blacklist = blacklist, \
                                     split_algo = managedWorkflow['split_algo'],
                                     type = managedWorkflow['type'])
                    newSub.create()

        managedWorkflows = self.getManagedWorkflows.execute()
        logging.debug("Found %s  managed workflows" \
              % len(managedWorkflows))

        unsubscribedFilesets = self.getUnsubscribedFilesets.execute()
        logging.debug("Found %s unsubscribed filesets" % \
                len(unsubscribedFilesets))

        # Loop on unsubscribed filesets to match workflows
        for unsubscribedFileset in unsubscribedFilesets:

            # Workflow object cache to pass into Subscription constructor
            # FIXME
            wfObj = None

            for managedWork in managedWorkflows:

                logging.debug("The workflow %s" % managedWork['workflow'])

                # Fileset object cache
                wfObj = None
                fsObj = None

                # Load the location information
                #whitelist = Set()
                #blacklist = Set()
                # Location is only caf
                #locations = self.queries.getLocations(managedWorkflow['id'])
                #for location in locations:
                #    if bool(int(location['valid'])) == True:
                #        whitelist.add(location['site_name'])
                #    else:
                #        blacklist.add(location['site_name'])

                # Attempt to match workflows to filesets
                if re.match(managedWork['fileset_match'], \
                     unsubscribedFileset['name']):
                    # Log in debug
                    msg = "Creating subscription for %s to workflow id %s"
                    msg %= (unsubscribedFileset['name'], \
                          managedWork['workflow'])
                    logging.debug(msg)

                    # Match found - Load the fileset if not already loaded
                    if not fsObj:
                        fsObj = Fileset(id=unsubscribedFileset['id'])
                        fsObj.load()

                    # Load the workflow if not already loaded
                    if not wfObj:
                        wfObj = Workflow(id=managedWork['workflow'])
                        wfObj.load()

                    # Create the subscription
                    newSub = Subscription(fileset = fsObj, \
                                     workflow = wfObj, \
                                     #whitelist = whitelist, \
                                     #blacklist = blacklist, \
                                     split_algo = managedWork['split_algo'],
                                     type = managedWork['type'])
                    newSub.create()
                    newSub.load()
Ejemplo n.º 45
0
    def createTestJobGroup(self, nJobs=10, retry_count=1,
                           workloadPath='test', fwjrPath=None,
                           workloadName=makeUUID(),
                           fileModifier=''):
        """
        Creates a group of several jobs
        """


        myThread = threading.currentThread()
        myThread.transaction.begin()
        testWorkflow = Workflow(spec=workloadPath, owner="cmsdataops", group="cmsdataops",
                                name=workloadName, task="/TestWorkload/ReReco")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier, size=1024, events=10)
        testFile0.addRun(Run(10, *[12312]))
        testFile0.setLocation('T2_CH_CERN')

        testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier, size=1024, events=10,
                         first_event=88)
        testFileA.addRun(Run(10, *[12312, 12313]))
        testFileA.setLocation('T2_CH_CERN')

        testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier, size=1024, events=10,
                         first_event=88)
        testFileB.addRun(Run(10, *[12314, 12315, 12316]))
        testFileB.setLocation('T2_CH_CERN')

        testFile0.create()
        testFileA.create()
        testFileB.create()

        testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier)
        testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier)

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob['retry_count'] = retry_count
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312])
            testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316])
            testJob['cache_dir'] = os.path.join(self.testDir, testJob['name'])
            testJob['fwjr_path'] = fwjrPath
            os.mkdir(testJob['cache_dir'])
            testJobGroup.add(testJob)
            testJob.create(group=testJobGroup)
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob.save()


        testJobGroup.commit()


        testSubscription.acquireFiles(files=[testFileA, testFileB])
        testSubscription.save()
        myThread.transaction.commit()

        return testJobGroup
Ejemplo n.º 46
0
    def testTruncatedWFInsertion(self):
        """
        _testTruncatedWFInsertion_

        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   seName='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testTopLevelTask,
                                          testWMBSHelper.topLevelFileset)

        testWorkload.truncate("ResubmitTestWorkload",
                              "/TestWorkload/ProcessingTask/MergeTask",
                              "someserver", "somedatabase")

        # create  the subscription for multiple top task (MergeTask and CleanupTask for the same block)
        for task in testWorkload.getTopLevelTask():
            testResubmitWMBSHelper = WMBSHelper(testWorkload,
                                                task.name(),
                                                "SomeBlock2",
                                                cachepath=self.workDir)
            testResubmitWMBSHelper.createTopLevelFileset()
            testResubmitWMBSHelper.createSubscription(
                task, testResubmitWMBSHelper.topLevelFileset)

        mergeWorkflow = Workflow(name="ResubmitTestWorkload",
                                 task="/ResubmitTestWorkload/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(name="ResubmitTestWorkload",
                                   task="/ResubmitTestWorkload/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            cleanupWorkflow.spec,
            os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name,
                         "/ResubmitTestWorkload/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="ResubmitTestWorkload",
            task="/ResubmitTestWorkload/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(
            name="ResubmitTestWorkload-MergeTask-SomeBlock2")
        topLevelFileset.loadData()

        mergeSubscription = Subscription(fileset=topLevelFileset,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        return
Ejemplo n.º 47
0
    def stuffWMBS(self, workflowURL, name):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s1", pnn="somese.cern.ch")

        mergeFileset = Fileset(name="mergeFileset")
        mergeFileset.create()
        bogusFileset = Fileset(name="bogusFileset")
        bogusFileset.create()

        mergeWorkflow = Workflow(spec=workflowURL, owner="mnorman",
                                 name=name, task="/TestWorkload/ReReco")
        mergeWorkflow.create()

        mergeSubscription = Subscription(fileset=mergeFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")
        mergeSubscription.create()
        dummySubscription = Subscription(fileset=bogusFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")

        file1 = File(lfn="file1", size=1024, events=1024, first_event=0,
                     locations={"somese.cern.ch"})
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"})
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"})
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations={"somese.cern.ch"})
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"})
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"})
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"})
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations={"somese.cern.ch"})
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations={"somese.cern.ch"})
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn="fileIII", size=1024, events=1024, first_event=2048, locations={"somese.cern.ch"})
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn="fileIV", size=1024 * 1000000, events=1024, first_event=3072, locations={"somese.cern.ch"})
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV]:
            mergeFileset.addFile(fileObj)
            bogusFileset.addFile(fileObj)

        mergeFileset.commit()
        bogusFileset.commit()

        return
Ejemplo n.º 48
0
class WMBSHelperTest(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInitCouchApp(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump")
        self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump")
        self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache")
        os.environ["COUCHDB"] = "wmbshelper_t"
        self.testInit.setSchema(customModules=[
            "WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir",
            "WMCore.ResourceControl"
        ],
                                useDefault=False)

        self.workDir = self.testInit.generateWorkDir()

        self.wmspec = self.createWMSpec()
        self.topLevelTask = getFirstTask(self.wmspec)
        self.inputDataset = self.topLevelTask.inputDataset()
        self.dataset = self.topLevelTask.getInputDatasetPath()
        self.dbs = MockDBSReader(self.inputDataset.dbsurl)
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=threading.currentThread().logger,
                                     dbinterface=threading.currentThread().dbi)
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out the database.
        """
        self.testInit.clearDatabase()
        self.testInit.tearDownCouch()
        self.testInit.delWorkDir()
        return

    def setupForKillTest(self, baAPI=None):
        """
        _setupForKillTest_

        Inject a workflow into WMBS that has a processing task, a merge task and
        a cleanup task.  Inject files into the various tasks at various
        processing states (acquired, complete, available...).  Also create jobs
        for each subscription in various states.
        """
        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daoFactory(classname="Locations.New")
        changeStateAction = daoFactory(classname="Jobs.ChangeState")
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \
                                        maxSlots = 10000, pendingSlots = 10000)

        userDN = 'someDN'
        userAction = daoFactory(classname="Users.New")
        userAction.execute(dn=userDN,
                           group_name='DEFAULT',
                           role_name='DEFAULT')

        inputFileset = Fileset("input")
        inputFileset.create()

        inputFileA = File("lfnA", locations="goodse.cern.ch")
        inputFileB = File("lfnB", locations="goodse.cern.ch")
        inputFileC = File("lfnC", locations="goodse.cern.ch")
        inputFileA.create()
        inputFileB.create()
        inputFileC.create()

        inputFileset.addFile(inputFileA)
        inputFileset.addFile(inputFileB)
        inputFileset.addFile(inputFileC)
        inputFileset.commit()

        unmergedOutputFileset = Fileset("unmerged")
        unmergedOutputFileset.create()

        unmergedFileA = File("ulfnA", locations="goodse.cern.ch")
        unmergedFileB = File("ulfnB", locations="goodse.cern.ch")
        unmergedFileC = File("ulfnC", locations="goodse.cern.ch")
        unmergedFileA.create()
        unmergedFileB.create()
        unmergedFileC.create()

        unmergedOutputFileset.addFile(unmergedFileA)
        unmergedOutputFileset.addFile(unmergedFileB)
        unmergedOutputFileset.addFile(unmergedFileC)
        unmergedOutputFileset.commit()

        mainProcWorkflow = Workflow(spec="spec1",
                                    owner="Steve",
                                    name="Main",
                                    task="Proc")
        mainProcWorkflow.create()
        mainProcMergeWorkflow = Workflow(spec="spec1",
                                         owner="Steve",
                                         name="Main",
                                         task="ProcMerge")
        mainProcMergeWorkflow.create()
        mainCleanupWorkflow = Workflow(spec="spec1",
                                       owner="Steve",
                                       name="Main",
                                       task="Cleanup")
        mainCleanupWorkflow.create()

        self.mainProcSub = Subscription(fileset=inputFileset,
                                        workflow=mainProcWorkflow,
                                        type="Processing")
        self.mainProcSub.create()
        self.mainProcSub.acquireFiles(inputFileA)
        self.mainProcSub.completeFiles(inputFileB)

        procJobGroup = JobGroup(subscription=self.mainProcSub)
        procJobGroup.create()
        self.procJobA = Job(name="ProcJobA")
        self.procJobA["state"] = "new"
        self.procJobA["location"] = "site1"
        self.procJobB = Job(name="ProcJobB")
        self.procJobB["state"] = "executing"
        self.procJobB["location"] = "site1"
        self.procJobC = Job(name="ProcJobC")
        self.procJobC["state"] = "complete"
        self.procJobC["location"] = "site1"
        self.procJobA.create(procJobGroup)
        self.procJobB.create(procJobGroup)
        self.procJobC.create(procJobGroup)

        self.mainMergeSub = Subscription(fileset=unmergedOutputFileset,
                                         workflow=mainProcMergeWorkflow,
                                         type="Merge")
        self.mainMergeSub.create()
        self.mainMergeSub.acquireFiles(unmergedFileA)
        self.mainMergeSub.failFiles(unmergedFileB)

        mergeJobGroup = JobGroup(subscription=self.mainMergeSub)
        mergeJobGroup.create()
        self.mergeJobA = Job(name="MergeJobA")
        self.mergeJobA["state"] = "exhausted"
        self.mergeJobA["location"] = "site1"
        self.mergeJobB = Job(name="MergeJobB")
        self.mergeJobB["state"] = "cleanout"
        self.mergeJobB["location"] = "site1"
        self.mergeJobC = Job(name="MergeJobC")
        self.mergeJobC["state"] = "new"
        self.mergeJobC["location"] = "site1"
        self.mergeJobA.create(mergeJobGroup)
        self.mergeJobB.create(mergeJobGroup)
        self.mergeJobC.create(mergeJobGroup)

        self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset,
                                           workflow=mainCleanupWorkflow,
                                           type="Cleanup")
        self.mainCleanupSub.create()
        self.mainCleanupSub.acquireFiles(unmergedFileA)
        self.mainCleanupSub.completeFiles(unmergedFileB)

        cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub)
        cleanupJobGroup.create()
        self.cleanupJobA = Job(name="CleanupJobA")
        self.cleanupJobA["state"] = "new"
        self.cleanupJobA["location"] = "site1"
        self.cleanupJobB = Job(name="CleanupJobB")
        self.cleanupJobB["state"] = "executing"
        self.cleanupJobB["location"] = "site1"
        self.cleanupJobC = Job(name="CleanupJobC")
        self.cleanupJobC["state"] = "complete"
        self.cleanupJobC["location"] = "site1"
        self.cleanupJobA.create(cleanupJobGroup)
        self.cleanupJobB.create(cleanupJobGroup)
        self.cleanupJobC.create(cleanupJobGroup)

        jobList = [
            self.procJobA, self.procJobB, self.procJobC, self.mergeJobA,
            self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB,
            self.cleanupJobC
        ]

        changeStateAction.execute(jobList)

        if baAPI:
            for job in jobList:
                job['plugin'] = 'TestPlugin'
                job['userdn'] = userDN
                job['usergroup'] = 'DEFAULT'
                job['userrole'] = 'DEFAULT'
                job['custom']['location'] = 'site1'
            baAPI.createNewJobs(wmbsJobs=jobList)

        # We'll create an unrelated workflow to verify that it isn't affected
        # by the killing code.
        bogusFileset = Fileset("dontkillme")
        bogusFileset.create()

        bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch")
        bogusFileA.create()
        bogusFileset.addFile(bogusFileA)
        bogusFileset.commit()

        bogusWorkflow = Workflow(spec="spec2",
                                 owner="Steve",
                                 name="Bogus",
                                 task="Proc")
        bogusWorkflow.create()
        self.bogusSub = Subscription(fileset=bogusFileset,
                                     workflow=bogusWorkflow,
                                     type="Processing")
        self.bogusSub.create()
        self.bogusSub.acquireFiles(bogusFileA)
        return

    def verifyFileKillStatus(self):
        """
        _verifyFileKillStatus_

        Verify that all files were killed correctly.  The status of files in
        Cleanup and LogCollect subscriptions isn't modified.  Status of
        already completed and failed files is not modified.  Also verify that
        the bogus subscription is untouched.
        """
        failedFiles = self.mainProcSub.filesOfStatus("Failed")
        acquiredFiles = self.mainProcSub.filesOfStatus("Acquired")
        completedFiles = self.mainProcSub.filesOfStatus("Completed")
        availableFiles = self.mainProcSub.filesOfStatus("Available")
        bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired")

        self.assertEqual(len(availableFiles), 0, \
                         "Error: There should be no available files.")
        self.assertEqual(len(acquiredFiles), 0, \
                         "Error: There should be no acquired files.")
        self.assertEqual(len(bogusAcquiredFiles), 1, \
                         "Error: There should be one acquired file.")

        self.assertEqual(len(completedFiles), 3, \
                         "Error: There should be only one completed file.")
        goldenLFNs = ["lfnA", "lfnB", "lfnC"]
        for completedFile in completedFiles:
            self.assertTrue(completedFile["lfn"] in goldenLFNs, \
                          "Error: Extra completed file.")
            goldenLFNs.remove(completedFile["lfn"])

        self.assertEqual(len(failedFiles), 0, \
                         "Error: There should be no failed files.")

        self.assertEqual(len(goldenLFNs), 0, \
                         "Error: Missing LFN")

        failedFiles = self.mainMergeSub.filesOfStatus("Failed")
        acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired")
        completedFiles = self.mainMergeSub.filesOfStatus("Completed")
        availableFiles = self.mainMergeSub.filesOfStatus("Available")

        self.assertEqual(len(acquiredFiles), 0, \
                         "Error: Merge subscription should have 0 acq files.")
        self.assertEqual(len(availableFiles), 0, \
                         "Error: Merge subscription should have 0 avail files.")

        self.assertEqual(len(failedFiles), 1, \
                         "Error: Merge subscription should have 1 failed files.")
        self.assertEqual(
            list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.")

        self.assertEqual(len(completedFiles), 2, \
                         "Error: Merge subscription should have 2 compl files.")
        goldenLFNs = ["ulfnA", "ulfnC"]
        for completedFile in completedFiles:
            self.assertTrue(completedFile["lfn"] in goldenLFNs, \
                          "Error: Extra complete file.")
            goldenLFNs.remove(completedFile["lfn"])

        self.assertEqual(len(goldenLFNs), 0, \
                         "Error: Missing LFN")

        failedFiles = self.mainCleanupSub.filesOfStatus("Failed")
        acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired")
        completedFiles = self.mainCleanupSub.filesOfStatus("Completed")
        availableFiles = self.mainCleanupSub.filesOfStatus("Available")

        self.assertEqual(len(failedFiles), 0, \
                         "Error: Cleanup subscription should have 0 fai files.")

        self.assertEqual(len(acquiredFiles), 1, \
                         "Error: There should be only one acquired file.")
        self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \
                         "Error: Wrong acquired LFN.")

        self.assertEqual(len(completedFiles), 1, \
                         "Error: There should be only one completed file.")
        self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \
                         "Error: Wrong completed LFN.")

        self.assertEqual(len(availableFiles), 1, \
                         "Error: There should be only one available file.")
        self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \
                         "Error: Wrong completed LFN.")

        return

    def verifyJobKillStatus(self):
        """
        _verifyJobKillStatus_

        Verify that jobs are killed correctly.  Jobs belonging to Cleanup and
        LogCollect subscriptions are not killed.  The status of jobs that have
        already finished running is not changed.
        """
        self.procJobA.load()
        self.procJobB.load()
        self.procJobC.load()

        self.assertEqual(self.procJobA["state"], "killed", \
                         "Error: Proc job A should be killed.")
        self.assertEqual(self.procJobB["state"], "killed", \
                         "Error: Proc job B should be killed.")
        self.assertEqual(self.procJobC["state"], "complete", \
                         "Error: Proc job C should be complete.")

        self.mergeJobA.load()
        self.mergeJobB.load()
        self.mergeJobC.load()

        self.assertEqual(self.mergeJobA["state"], "exhausted", \
                         "Error: Merge job A should be exhausted.")
        self.assertEqual(self.mergeJobB["state"], "cleanout", \
                         "Error: Merge job B should be cleanout.")
        self.assertEqual(self.mergeJobC["state"], "killed", \
                         "Error: Merge job C should be killed.")

        self.cleanupJobA.load()
        self.cleanupJobB.load()
        self.cleanupJobC.load()

        self.assertEqual(self.cleanupJobA["state"], "new", \
                         "Error: Cleanup job A should be new.")
        self.assertEqual(self.cleanupJobB["state"], "executing", \
                         "Error: Cleanup job B should be executing.")
        self.assertEqual(self.cleanupJobC["state"], "complete", \
                         "Error: Cleanup job C should be complete.")
        return

    def createTestWMSpec(self):
        """
        _createTestWMSpec_

        Create a WMSpec that has a processing, merge, cleanup and skims tasks that
        can be used by the subscription creation test.
        """
        testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload"))
        testWorkload.setDashboardActivity("TestReReco")
        testWorkload.setSpecUrl("/path/to/workload")
        testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'})

        procTask = testWorkload.newTask("ProcessingTask")
        procTask.setTaskType("Processing")
        procTask.setSplittingAlgorithm("FileBased", files_per_job=1)
        procTaskCMSSW = procTask.makeStep("cmsRun1")
        procTaskCMSSW.setStepType("CMSSW")
        procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper()
        procTask.setTaskType("Processing")
        procTask.setSiteWhitelist(["site1"])
        procTask.setSiteBlacklist(["site2"])
        procTask.applyTemplates()

        procTaskCMSSWHelper.addOutputModule("OutputA",
                                            primaryDataset="bogusPrimary",
                                            processedDataset="bogusProcessed",
                                            dataTier="DataTierA",
                                            lfnBase="bogusUnmerged",
                                            mergedLFNBase="bogusMerged",
                                            filterName=None)

        mergeTask = procTask.addTask("MergeTask")
        mergeTask.setInputReference(procTaskCMSSW, outputModule="OutputA")
        mergeTask.setTaskType("Merge")
        mergeTask.setSplittingAlgorithm("WMBSMergeBySize",
                                        min_merge_size=1,
                                        max_merge_size=2,
                                        max_merge_events=3)
        mergeTaskCMSSW = mergeTask.makeStep("cmsRun1")
        mergeTaskCMSSW.setStepType("CMSSW")
        mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper()
        mergeTask.setTaskType("Merge")
        mergeTask.applyTemplates()

        mergeTaskCMSSWHelper.addOutputModule("Merged",
                                             primaryDataset="bogusPrimary",
                                             processedDataset="bogusProcessed",
                                             dataTier="DataTierA",
                                             lfnBase="bogusUnmerged",
                                             mergedLFNBase="bogusMerged",
                                             filterName=None)

        cleanupTask = procTask.addTask("CleanupTask")
        cleanupTask.setInputReference(procTaskCMSSW, outputModule="OutputA")
        cleanupTask.setTaskType("Merge")
        cleanupTask.setSplittingAlgorithm("SiblingProcessingBase",
                                          files_per_job=50)
        cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1")
        cleanupTaskCMSSW.setStepType("CMSSW")
        cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper()
        cleanupTask.setTaskType("Cleanup")
        cleanupTask.applyTemplates()

        skimTask = mergeTask.addTask("SkimTask")
        skimTask.setTaskType("Skim")
        skimTask.setInputReference(mergeTaskCMSSW, outputModule="Merged")
        skimTask.setSplittingAlgorithm("FileBased",
                                       files_per_job=1,
                                       include_parents=True)
        skimTaskCMSSW = skimTask.makeStep("cmsRun1")
        skimTaskCMSSW.setStepType("CMSSW")
        skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper()
        skimTask.setTaskType("Skim")
        skimTask.applyTemplates()

        skimTaskCMSSWHelper.addOutputModule("SkimOutputA",
                                            primaryDataset="bogusPrimary",
                                            processedDataset="bogusProcessed",
                                            dataTier="DataTierA",
                                            lfnBase="bogusUnmerged",
                                            mergedLFNBase="bogusMerged",
                                            filterName=None)

        skimTaskCMSSWHelper.addOutputModule("SkimOutputB",
                                            primaryDataset="bogusPrimary",
                                            processedDataset="bogusProcessed",
                                            dataTier="DataTierA",
                                            lfnBase="bogusUnmerged",
                                            mergedLFNBase="bogusMerged",
                                            filterName=None)
        return testWorkload

    def setupMCWMSpec(self):
        """Setup MC workflow"""
        self.wmspec = self.createMCWMSpec()
        self.topLevelTask = getFirstTask(self.wmspec)
        self.inputDataset = self.topLevelTask.inputDataset()
        self.dataset = self.topLevelTask.getInputDatasetPath()
        self.dbs = None
        self.siteDB = fakeSiteDB()

        # add sites that would normally be added by operator via resource_control
        locationDAO = self.daoFactory(classname="Locations.New")
        self.ses = []
        for site in ['T2_XX_SiteA', 'T2_XX_SiteB']:
            locationDAO.execute(siteName=site,
                                seName=self.siteDB.cmsNametoSE(site))
            self.ses.append(self.siteDB.cmsNametoSE(site))

    def createWMSpec(self, name='ReRecoWorkload'):
        wmspec = rerecoWorkload(name, rerecoArgs)
        wmspec.setSpecUrl("/path/to/workload")
        return wmspec

    def createMCWMSpec(self, name='MonteCarloWorkload'):
        wmspec = monteCarloWorkload(name, mcArgs)
        wmspec.setSpecUrl("/path/to/workload")
        getFirstTask(wmspec).addProduction(totalevents=10000)
        return wmspec

    def getDBS(self, wmspec):
        topLevelTask = getFirstTask(wmspec)
        inputDataset = topLevelTask.inputDataset()
        dbs = MockDBSReader(inputDataset.dbsurl)
        #dbsDict = {self.inputDataset.dbsurl : self.dbs}
        return dbs

    def createWMBSHelperWithTopTask(self,
                                    wmspec,
                                    block,
                                    mask=None,
                                    parentFlag=False,
                                    detail=False):

        topLevelTask = getFirstTask(wmspec)

        wmbs = WMBSHelper(wmspec,
                          topLevelTask.name(),
                          block,
                          mask,
                          cachepath=self.workDir)
        if block:
            if parentFlag:
                block = self.dbs.getFileBlockWithParents(block)[block]
            else:
                block = self.dbs.getFileBlock(block)[block]
        sub, files = wmbs.createSubscriptionAndAddFiles(block=block)
        if detail:
            return wmbs, sub, files
        else:
            return wmbs

    def testKillWorkflow(self):
        """
        _testKillWorkflow_

        Verify that workflow killing works correctly.
        """
        configFile = EmulatorSetup.setupWMAgentConfig()

        config = loadConfigurationFile(configFile)

        baAPI = BossAirAPI(config=config)

        # Create nine jobs
        self.setupForKillTest(baAPI=baAPI)
        self.assertEqual(len(baAPI._listRunJobs()), 9)
        killWorkflow("Main", config, config)

        self.verifyFileKillStatus()
        self.verifyJobKillStatus()
        self.assertEqual(len(baAPI._listRunJobs()), 8)

        EmulatorSetup.deleteConfig(configFile)
        return

    def testCreateSubscription(self):
        """
        _testCreateSubscription_

        Verify that the subscription creation code works correctly.
        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   seName='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testTopLevelTask,
                                          testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/ProcessingTask")
        procWorkflow.load()

        self.assertEqual(procWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner: %s" % procWorkflow.owner)
        self.assertEqual(procWorkflow.group, "DMWM",
                         "Error: Wrong group: %s" % procWorkflow.group)
        self.assertEqual(procWorkflow.wfType, "TestReReco",
                         "Error: Wrong type.")
        self.assertEqual(
            procWorkflow.spec,
            os.path.join(self.workDir, procWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(procWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        mergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "merged_output_fileset"]
        unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "output_fileset"]

        mergedProcOutput.loadData()
        unmergedProcOutput.loadData()

        self.assertEqual(
            mergedProcOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedProcOutput.name,
                         "/TestWorkload/ProcessingTask/unmerged-OutputA",
                         "Error: Unmerged output fileset is wrong.")

        mergeWorkflow = Workflow(name="TestWorkload",
                                 task="/TestWorkload/ProcessingTask/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            cleanupWorkflow.spec,
            os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(
            unmergedMergeOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(len(procSubscription.getWhiteBlackList()), 2,
                         "Error: Wrong site white/black list for proc sub.")
        for site in procSubscription.getWhiteBlackList():
            if site["site_name"] == "site1":
                self.assertEqual(site["valid"], 1,
                                 "Error: Site should be white listed.")
            else:
                self.assertEqual(site["valid"], 0,
                                 "Error: Site should be black listed.")

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        mergeSubscription = Subscription(fileset=unmergedProcOutput,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")
        return

    def testTruncatedWFInsertion(self):
        """
        _testTruncatedWFInsertion_

        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   seName='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testTopLevelTask,
                                          testWMBSHelper.topLevelFileset)

        testWorkload.truncate("ResubmitTestWorkload",
                              "/TestWorkload/ProcessingTask/MergeTask",
                              "someserver", "somedatabase")

        # create  the subscription for multiple top task (MergeTask and CleanupTask for the same block)
        for task in testWorkload.getTopLevelTask():
            testResubmitWMBSHelper = WMBSHelper(testWorkload,
                                                task.name(),
                                                "SomeBlock2",
                                                cachepath=self.workDir)
            testResubmitWMBSHelper.createTopLevelFileset()
            testResubmitWMBSHelper.createSubscription(
                task, testResubmitWMBSHelper.topLevelFileset)

        mergeWorkflow = Workflow(name="ResubmitTestWorkload",
                                 task="/ResubmitTestWorkload/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(name="ResubmitTestWorkload",
                                   task="/ResubmitTestWorkload/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            cleanupWorkflow.spec,
            os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name,
                         "/ResubmitTestWorkload/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="ResubmitTestWorkload",
            task="/ResubmitTestWorkload/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(
            name="ResubmitTestWorkload-MergeTask-SomeBlock2")
        topLevelFileset.loadData()

        mergeSubscription = Subscription(fileset=topLevelFileset,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        return


#    def testProduction(self):
#        """Production workflow"""
#        pass

    def testReReco(self):
        """ReReco workflow"""
        # create workflow
        block = self.dataset + "#1"
        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block)
        files = wmbs.validFiles(self.dbs.getFileBlock(block))
        self.assertEqual(len(files), 1)

    def testReRecoBlackRunRestriction(self):
        """ReReco workflow with Run restrictions"""
        block = self.dataset + "#2"
        #add run blacklist
        self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4])
        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block)

        files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files'])
        self.assertEqual(len(files), 0)

    def testReRecoWhiteRunRestriction(self):
        block = self.dataset + "#2"
        # Run Whitelist
        self.topLevelTask.setInputRunWhitelist([2])
        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block)
        files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files'])
        self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock())

    def testLumiMaskRestrictionsOK(self):
        block = self.dataset + "#1"
        self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['1']
        self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['1,1']
        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block)
        files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files'])
        self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock())

    def testLumiMaskRestrictionsKO(self):
        block = self.dataset + "#1"
        self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = [
            '123454321'
        ]
        self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = [
            '123,123'
        ]
        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block)
        files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files'])
        self.assertEqual(len(files), 0)

    def testDuplicateFileInsert(self):
        # using default wmspec
        block = self.dataset + "#1"
        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block)
        wmbs.topLevelFileset.loadData()
        numOfFiles = len(wmbs.topLevelFileset.files)
        # check initially inserted files.
        dbsFiles = self.dbs.getFileBlock(block)[block]['Files']
        self.assertEqual(numOfFiles, len(dbsFiles))
        firstFileset = wmbs.topLevelFileset
        wmbsDao = wmbs.daofactory(classname="Files.InFileset")

        numOfFiles = len(wmbsDao.execute(firstFileset.id))
        self.assertEqual(numOfFiles, len(dbsFiles))

        # use the new spec with same inputdataset
        block = self.dataset + "#1"
        wmspec = self.createWMSpec("TestSpec1")
        dbs = self.getDBS(wmspec)
        wmbs = self.createWMBSHelperWithTopTask(wmspec, block)
        # check duplicate insert
        dbsFiles = dbs.getFileBlock(block)[block]['Files']
        numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block])
        self.assertEqual(numOfFiles, 0)
        secondFileset = wmbs.topLevelFileset

        wmbsDao = wmbs.daofactory(classname="Files.InFileset")
        numOfFiles = len(wmbsDao.execute(secondFileset.id))
        self.assertEqual(numOfFiles, len(dbsFiles))

        self.assertNotEqual(firstFileset.id, secondFileset.id)

    def testDuplicateSubscription(self):
        """Can't duplicate subscriptions"""
        # using default wmspec
        block = self.dataset + "#1"
        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block)
        wmbs.topLevelFileset.loadData()
        numOfFiles = len(wmbs.topLevelFileset.files)
        filesetId = wmbs.topLevelFileset.id
        subId = wmbs.topLevelSubscription['id']

        # check initially inserted files.
        dbsFiles = self.dbs.getFileBlock(block)[block]['Files']
        self.assertEqual(numOfFiles, len(dbsFiles))
        firstFileset = wmbs.topLevelFileset
        self.assertEqual(numOfFiles, len(dbsFiles))

        # reinsert subscription - shouldn't create anything new
        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block)
        wmbs.topLevelFileset.loadData()
        self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files))
        self.assertEqual(filesetId, wmbs.topLevelFileset.id)
        self.assertEqual(subId, wmbs.topLevelSubscription['id'])

        # now do a montecarlo workflow
        self.setupMCWMSpec()
        mask = Mask(FirstRun=12,
                    FirstLumi=1234,
                    FirstEvent=12345,
                    LastEvent=999995,
                    LastLumi=12345,
                    LastRun=12)

        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask)
        wmbs.topLevelFileset.loadData()
        numOfFiles = len(wmbs.topLevelFileset.files)
        filesetId = wmbs.topLevelFileset.id
        subId = wmbs.topLevelSubscription['id']

        # check initially inserted files.
        numDbsFiles = 1
        self.assertEqual(numOfFiles, numDbsFiles)
        firstFileset = wmbs.topLevelFileset
        self.assertEqual(numOfFiles, numDbsFiles)

        # reinsert subscription - shouldn't create anything new
        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask)
        wmbs.topLevelFileset.loadData()
        self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files))
        self.assertEqual(filesetId, wmbs.topLevelFileset.id)
        self.assertEqual(subId, wmbs.topLevelSubscription['id'])

    def testParentage(self):
        """
        1. check whether parent files are created in wmbs.
        2. check parent files are associated to child.
        3. When 2 specs with the same input data (one with parent processing, one without it)
           is inserted, if one without parent processing inserted first then the other with
           parent processing insert, it still needs to create parent files although child files
           are duplicate
        """
        block = self.dataset + "#1"
        wmbs, sub, numFiles = self.createWMBSHelperWithTopTask(
            self.wmspec, block, parentFlag=False, detail=True)
        # file creation without parents
        self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles)
        wmbs.topLevelFileset.loadData()

        for child in wmbs.topLevelFileset.files:
            # no parent per child
            self.assertEqual(len(child["parents"]), 0)

        wmbs, sub, numFiles = self.createWMBSHelperWithTopTask(self.wmspec,
                                                               block,
                                                               parentFlag=True,
                                                               detail=True)
        self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles)

        wmbs.topLevelFileset.loadData()

        for child in wmbs.topLevelFileset.files:
            # one parent per child
            self.assertEqual(len(child["parents"]), 1)

    def testMCFakeFileInjection(self):
        """Inject fake Monte Carlo files into WMBS"""
        self.setupMCWMSpec()

        mask = Mask(FirstRun=12,
                    FirstLumi=1234,
                    FirstEvent=12345,
                    LastEvent=999995,
                    LastLumi=12345,
                    LastRun=12)

        wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask)
        subscription = wmbs.topLevelSubscription
        self.assertEqual(1, subscription.exists())
        fileset = subscription['fileset']
        self.assertEqual(1, fileset.exists())
        fileset.loadData()  # need to refresh from database

        self.assertEqual(len(fileset.files), 1)
        self.assertEqual(len(fileset.parents), 0)
        self.assertFalse(fileset.open)

        file = list(fileset.files)[0]
        self.assertEqual(file['events'], mask['LastEvent'] -
                         mask['FirstEvent'] + 1)  # inclusive range
        self.assertEqual(file['merged'],
                         False)  # merged files get added to dbs
        self.assertEqual(len(file['parents']), 0)
        #file.loadData()
        self.assertEqual(sorted(file['locations']), sorted(self.ses))
        self.assertEqual(len(file.getParentLFNs()), 0)

        self.assertEqual(len(file.getRuns()), 1)
        run = file.getRuns()[0]
        self.assertEqual(run.run, mask['FirstRun'])
        self.assertEqual(run.lumis[0], mask['FirstLumi'])
        self.assertEqual(run.lumis[-1], mask['LastLumi'])
        self.assertEqual(len(run.lumis),
                         mask['LastLumi'] - mask['FirstLumi'] + 1)
Ejemplo n.º 49
0
    def testJobSummary(self):
        """
        _testJobSummary_

        verify that job summary for jobs with fwjr are correctly created
        and that status is updated when updatesummary flag is enabled
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"]))
        testFile.create()
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 1, \
            "Error: Splitting should have created one job."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Production"

        change.propagate([testJobA], 'created', 'new')
        myReport = Report()
        reportPath = os.path.join(getTestBase(),
                                  "WMCore_t/JobStateMachine_t/Report.pkl")
        myReport.unpersist(reportPath)

        change.propagate([testJobA], 'executing', 'created')
        testJobA["fwjr"] = myReport
        change.propagate([testJobA], 'jobfailed', 'executing')

        changeStateDB = self.couchServer.connectDatabase(
            dbname=self.config.JobStateMachine.jobSummaryDBName)
        allDocs = changeStateDB.document("_all_docs")

        self.assertEqual(len(allDocs["rows"]), 2,
                         "Error: Wrong number of documents")

        fwjrDoc = {'state': None}
        for resultRow in allDocs["rows"]:
            if resultRow["id"] != "_design/WMStats":
                fwjrDoc = changeStateDB.document(resultRow["id"])
                break

        self.assertEqual(fwjrDoc['state'], 'jobfailed',
                         "Error: summary doesn't have the expected job state")

        del testJobA["fwjr"]

        change.propagate([testJobA],
                         'jobcooloff',
                         'jobfailed',
                         updatesummary=True)
        return
Ejemplo n.º 50
0
    def testIndexConflict(self):
        """
        _testIndexConflict_

        Verify that in case of conflict in the job index
        we discard the old document and replace with a new
        one. Only works for MySQL backend
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"]))
        testFile.create()
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 1, \
            "Error: Splitting should have created one job."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "CompOps"
        testJobA["taskType"] = "Processing"

        myReport = Report()
        reportPath = os.path.join(getTestBase(),
                                  "WMCore_t/JobStateMachine_t/Report.pkl")
        myReport.unpersist(reportPath)

        testJobA["fwjr"] = myReport
        change.propagate([testJobA], 'created', 'new')

        jobdatabase = self.couchServer.connectDatabase('changestate_t/jobs',
                                                       False)
        fwjrdatabase = self.couchServer.connectDatabase(
            'changestate_t/fwjrs', False)
        jobDoc = jobdatabase.document("1")
        fwjrDoc = fwjrdatabase.document("1-0")
        self.assertEqual(jobDoc["workflow"], "wf001",
                         "Wrong workflow in couch job document")
        self.assertEqual(fwjrDoc["fwjr"]["task"], self.taskName,
                         "Wrong task in fwjr couch document")

        testJobA.delete()

        myThread = threading.currentThread()
        myThread.dbi.processData("ALTER TABLE wmbs_job AUTO_INCREMENT = 1")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf002",
                                task="/TestWorkflow/Test2")
        testWorkflow.create()
        testFileset = Fileset(name="TestFilesetB")
        testFileset.create()

        testFile = File(lfn="SomeLFNB", locations=set(["T2_CH_CERN"]))
        testFile.create()
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        testJobB = jobGroup.jobs[0]
        testJobB["user"] = "******"
        testJobB["group"] = "CompOps"
        testJobB["taskType"] = "Processing"
        testJobB["fwjr"] = myReport

        change.propagate([testJobB], 'created', 'new')
        jobDoc = jobdatabase.document("1")
        fwjrDoc = fwjrdatabase.document("1-0")
        self.assertEqual(jobDoc["workflow"], "wf002",
                         "Job document was not overwritten")
        self.assertEqual(fwjrDoc["fwjr"]["task"], "/TestWorkflow/Test2",
                         "FWJR document was not overwritten")

        return
Ejemplo n.º 51
0
    def setupForKillTest(self, baAPI=None):
        """
        _setupForKillTest_

        Inject a workflow into WMBS that has a processing task, a merge task and
        a cleanup task.  Inject files into the various tasks at various
        processing states (acquired, complete, available...).  Also create jobs
        for each subscription in various states.
        """
        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daoFactory(classname="Locations.New")
        changeStateAction = daoFactory(classname="Jobs.ChangeState")
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \
                                        maxSlots = 10000, pendingSlots = 10000)

        userDN = 'someDN'
        userAction = daoFactory(classname="Users.New")
        userAction.execute(dn=userDN,
                           group_name='DEFAULT',
                           role_name='DEFAULT')

        inputFileset = Fileset("input")
        inputFileset.create()

        inputFileA = File("lfnA", locations="goodse.cern.ch")
        inputFileB = File("lfnB", locations="goodse.cern.ch")
        inputFileC = File("lfnC", locations="goodse.cern.ch")
        inputFileA.create()
        inputFileB.create()
        inputFileC.create()

        inputFileset.addFile(inputFileA)
        inputFileset.addFile(inputFileB)
        inputFileset.addFile(inputFileC)
        inputFileset.commit()

        unmergedOutputFileset = Fileset("unmerged")
        unmergedOutputFileset.create()

        unmergedFileA = File("ulfnA", locations="goodse.cern.ch")
        unmergedFileB = File("ulfnB", locations="goodse.cern.ch")
        unmergedFileC = File("ulfnC", locations="goodse.cern.ch")
        unmergedFileA.create()
        unmergedFileB.create()
        unmergedFileC.create()

        unmergedOutputFileset.addFile(unmergedFileA)
        unmergedOutputFileset.addFile(unmergedFileB)
        unmergedOutputFileset.addFile(unmergedFileC)
        unmergedOutputFileset.commit()

        mainProcWorkflow = Workflow(spec="spec1",
                                    owner="Steve",
                                    name="Main",
                                    task="Proc")
        mainProcWorkflow.create()
        mainProcMergeWorkflow = Workflow(spec="spec1",
                                         owner="Steve",
                                         name="Main",
                                         task="ProcMerge")
        mainProcMergeWorkflow.create()
        mainCleanupWorkflow = Workflow(spec="spec1",
                                       owner="Steve",
                                       name="Main",
                                       task="Cleanup")
        mainCleanupWorkflow.create()

        self.mainProcSub = Subscription(fileset=inputFileset,
                                        workflow=mainProcWorkflow,
                                        type="Processing")
        self.mainProcSub.create()
        self.mainProcSub.acquireFiles(inputFileA)
        self.mainProcSub.completeFiles(inputFileB)

        procJobGroup = JobGroup(subscription=self.mainProcSub)
        procJobGroup.create()
        self.procJobA = Job(name="ProcJobA")
        self.procJobA["state"] = "new"
        self.procJobA["location"] = "site1"
        self.procJobB = Job(name="ProcJobB")
        self.procJobB["state"] = "executing"
        self.procJobB["location"] = "site1"
        self.procJobC = Job(name="ProcJobC")
        self.procJobC["state"] = "complete"
        self.procJobC["location"] = "site1"
        self.procJobA.create(procJobGroup)
        self.procJobB.create(procJobGroup)
        self.procJobC.create(procJobGroup)

        self.mainMergeSub = Subscription(fileset=unmergedOutputFileset,
                                         workflow=mainProcMergeWorkflow,
                                         type="Merge")
        self.mainMergeSub.create()
        self.mainMergeSub.acquireFiles(unmergedFileA)
        self.mainMergeSub.failFiles(unmergedFileB)

        mergeJobGroup = JobGroup(subscription=self.mainMergeSub)
        mergeJobGroup.create()
        self.mergeJobA = Job(name="MergeJobA")
        self.mergeJobA["state"] = "exhausted"
        self.mergeJobA["location"] = "site1"
        self.mergeJobB = Job(name="MergeJobB")
        self.mergeJobB["state"] = "cleanout"
        self.mergeJobB["location"] = "site1"
        self.mergeJobC = Job(name="MergeJobC")
        self.mergeJobC["state"] = "new"
        self.mergeJobC["location"] = "site1"
        self.mergeJobA.create(mergeJobGroup)
        self.mergeJobB.create(mergeJobGroup)
        self.mergeJobC.create(mergeJobGroup)

        self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset,
                                           workflow=mainCleanupWorkflow,
                                           type="Cleanup")
        self.mainCleanupSub.create()
        self.mainCleanupSub.acquireFiles(unmergedFileA)
        self.mainCleanupSub.completeFiles(unmergedFileB)

        cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub)
        cleanupJobGroup.create()
        self.cleanupJobA = Job(name="CleanupJobA")
        self.cleanupJobA["state"] = "new"
        self.cleanupJobA["location"] = "site1"
        self.cleanupJobB = Job(name="CleanupJobB")
        self.cleanupJobB["state"] = "executing"
        self.cleanupJobB["location"] = "site1"
        self.cleanupJobC = Job(name="CleanupJobC")
        self.cleanupJobC["state"] = "complete"
        self.cleanupJobC["location"] = "site1"
        self.cleanupJobA.create(cleanupJobGroup)
        self.cleanupJobB.create(cleanupJobGroup)
        self.cleanupJobC.create(cleanupJobGroup)

        jobList = [
            self.procJobA, self.procJobB, self.procJobC, self.mergeJobA,
            self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB,
            self.cleanupJobC
        ]

        changeStateAction.execute(jobList)

        if baAPI:
            for job in jobList:
                job['plugin'] = 'TestPlugin'
                job['userdn'] = userDN
                job['usergroup'] = 'DEFAULT'
                job['userrole'] = 'DEFAULT'
                job['custom']['location'] = 'site1'
            baAPI.createNewJobs(wmbsJobs=jobList)

        # We'll create an unrelated workflow to verify that it isn't affected
        # by the killing code.
        bogusFileset = Fileset("dontkillme")
        bogusFileset.create()

        bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch")
        bogusFileA.create()
        bogusFileset.addFile(bogusFileA)
        bogusFileset.commit()

        bogusWorkflow = Workflow(spec="spec2",
                                 owner="Steve",
                                 name="Bogus",
                                 task="Proc")
        bogusWorkflow.create()
        self.bogusSub = Subscription(fileset=bogusFileset,
                                     workflow=bogusWorkflow,
                                     type="Processing")
        self.bogusSub.create()
        self.bogusSub.acquireFiles(bogusFileA)
        return
Ejemplo n.º 52
0
    def testFWJRInputFileTruncation(self):
        """
        _testFWJRInputFileTruncation_

        Test and see whether the ChangeState code can
        be used to automatically truncate the number of input files
        in a FWJR

        Code stolen from the serialization test
        """

        self.config.JobStateMachine.maxFWJRInputFiles = 0
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"]))
        testFile.create()
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        self.assertEqual(len(jobGroup.jobs), 1,
                         "Error: Splitting should have created one job.")

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"

        change.propagate([testJobA], 'created', 'new')
        myReport = Report()
        reportPath = os.path.join(getTestBase(),
                                  "WMCore_t/JobStateMachine_t/Report.pkl")
        myReport.unpersist(reportPath)

        testJobA["fwjr"] = myReport

        change.propagate([testJobA], 'executing', 'created')

        changeStateDB = self.couchServer.connectDatabase(
            dbname="changestate_t/fwjrs")
        allDocs = changeStateDB.document("_all_docs")

        self.assertEqual(len(allDocs["rows"]), 2,
                         "Error: Wrong number of documents")

        result = changeStateDB.loadView("FWJRDump", "fwjrsByWorkflowName")
        self.assertEqual(len(result["rows"]), 1,
                         "Error: Wrong number of rows.")
        for row in result["rows"]:
            couchJobDoc = changeStateDB.document(row["value"]["id"])
            self.assertEqual(couchJobDoc["_rev"], row["value"]["rev"],
                             "Error: Rev is wrong.")

        for resultRow in allDocs["rows"]:
            if resultRow["id"] != "_design/FWJRDump":
                fwjrDoc = changeStateDB.document(resultRow["id"])
                break

        self.assertEqual(
            fwjrDoc["fwjr"]["steps"]['cmsRun1']['input']['source'], [])

        return
Ejemplo n.º 53
0
    def testJobKilling(self):
        """
        _testJobKilling_

        Test that we can successfully set jobs to the killed state
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        for i in range(4):
            newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"]))
            newFile.create()
            testFileset.addFile(newFile)

        testFileset.commit()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 4, \
            "Error: Splitting should have created four jobs."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"
        testJobB = jobGroup.jobs[1]
        testJobB["user"] = "******"
        testJobB["group"] = "DMWM"
        testJobB["taskType"] = "Processing"
        testJobC = jobGroup.jobs[2]
        testJobC["user"] = "******"
        testJobC["group"] = "DMWM"
        testJobC["taskType"] = "Processing"
        testJobD = jobGroup.jobs[3]
        testJobD["user"] = "******"
        testJobD["group"] = "DMWM"
        testJobD["taskType"] = "Processing"

        change.persist([testJobA], "created", "new")
        change.persist([testJobB], "jobfailed", "executing")
        change.persist([testJobC, testJobD], "executing", "created")

        change.persist([testJobA], "killed", "created")
        change.persist([testJobB], "killed", "jobfailed")
        change.persist([testJobC, testJobD], "killed", "executing")

        for job in [testJobA, testJobB, testJobC, testJobD]:
            job.load()
            self.assertEqual(job['retry_count'], 99999)
            self.assertEqual(job['state'], 'killed')

        return
Ejemplo n.º 54
0
    def testDuplicateJobReports(self):
        """
        _testDuplicateJobReports_

        Verify that everything works correctly if a job report is added to the
        database more than once.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"]))
        testFile.create()
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 1, \
            "Error: Splitting should have created one job."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"

        change.propagate([testJobA], 'created', 'new')
        myReport = Report()
        reportPath = os.path.join(getTestBase(),
                                  "WMCore_t/JobStateMachine_t/Report.pkl")
        myReport.unpersist(reportPath)
        testJobA["fwjr"] = myReport

        change.propagate([testJobA], 'executing', 'created')
        change.propagate([testJobA], 'executing', 'created')

        changeStateDB = self.couchServer.connectDatabase(
            dbname="changestate_t/fwjrs")
        allDocs = changeStateDB.document("_all_docs")

        self.assertEqual(len(allDocs["rows"]), 2,
                         "Error: Wrong number of documents")

        for resultRow in allDocs["rows"]:
            if resultRow["id"] != "_design/FWJRDump":
                changeStateDB.document(resultRow["id"])
                break

        return
Ejemplo n.º 55
0
    def testCreateSubscription(self):
        """
        _testCreateSubscription_

        Verify that the subscription creation code works correctly.
        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   seName='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testTopLevelTask,
                                          testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/ProcessingTask")
        procWorkflow.load()

        self.assertEqual(procWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner: %s" % procWorkflow.owner)
        self.assertEqual(procWorkflow.group, "DMWM",
                         "Error: Wrong group: %s" % procWorkflow.group)
        self.assertEqual(procWorkflow.wfType, "TestReReco",
                         "Error: Wrong type.")
        self.assertEqual(
            procWorkflow.spec,
            os.path.join(self.workDir, procWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(procWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        mergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "merged_output_fileset"]
        unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "output_fileset"]

        mergedProcOutput.loadData()
        unmergedProcOutput.loadData()

        self.assertEqual(
            mergedProcOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedProcOutput.name,
                         "/TestWorkload/ProcessingTask/unmerged-OutputA",
                         "Error: Unmerged output fileset is wrong.")

        mergeWorkflow = Workflow(name="TestWorkload",
                                 task="/TestWorkload/ProcessingTask/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            cleanupWorkflow.spec,
            os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(
            unmergedMergeOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(len(procSubscription.getWhiteBlackList()), 2,
                         "Error: Wrong site white/black list for proc sub.")
        for site in procSubscription.getWhiteBlackList():
            if site["site_name"] == "site1":
                self.assertEqual(site["valid"], 1,
                                 "Error: Site should be white listed.")
            else:
                self.assertEqual(site["valid"], 0,
                                 "Error: Site should be black listed.")

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        mergeSubscription = Subscription(fileset=unmergedProcOutput,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")
        return
Ejemplo n.º 56
0
    def testJobSerialization(self):
        """
        _testJobSerialization_

        Verify that serialization of a job works when adding a FWJR.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"]))
        testFile.create()
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 1, \
            "Error: Splitting should have created one job."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"

        change.propagate([testJobA], 'created', 'new')
        myReport = Report()
        reportPath = os.path.join(getTestBase(),
                                  "WMCore_t/JobStateMachine_t/Report.pkl")
        myReport.unpersist(reportPath)
        testJobA["fwjr"] = myReport

        change.propagate([testJobA], 'executing', 'created')

        changeStateDB = self.couchServer.connectDatabase(
            dbname="changestate_t/fwjrs")
        allDocs = changeStateDB.document("_all_docs")

        self.assertEqual(len(allDocs["rows"]), 2,
                         "Error: Wrong number of documents")

        result = changeStateDB.loadView("FWJRDump", "fwjrsByWorkflowName")
        self.assertEqual(len(result["rows"]), 1,
                         "Error: Wrong number of rows.")
        for row in result["rows"]:
            couchJobDoc = changeStateDB.document(row["value"]["id"])
            self.assertEqual(couchJobDoc["_rev"], row["value"]["rev"],
                             "Error: Rev is wrong.")

        for resultRow in allDocs["rows"]:
            if resultRow["id"] != "_design/FWJRDump":
                fwjrDoc = changeStateDB.document(resultRow["id"])
                break

        assert fwjrDoc["retrycount"] == 0, \
            "Error: Retry count is wrong."

        assert len(fwjrDoc["fwjr"]["steps"].keys()) == 2, \
            "Error: Wrong number of steps in FWJR."
        assert "cmsRun1" in fwjrDoc["fwjr"]["steps"].keys(), \
            "Error: cmsRun1 step is missing from FWJR."
        assert "stageOut1" in fwjrDoc["fwjr"]["steps"].keys(), \
            "Error: stageOut1 step is missing from FWJR."

        return
Ejemplo n.º 57
0
    def testListRunningJobs(self):
        """
        _testListRunningJobs_

        Test the ListRunningJobs DAO.
        """
        testWorkflow = Workflow(spec=makeUUID(),
                                owner="Steve",
                                name=makeUUID(),
                                task="Test")
        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        type="Processing")
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJobA = Job(name=makeUUID(), files=[])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group=testJobGroup)
        testJobA["state"] = "executing"

        testJobB = Job(name=makeUUID(), files=[])
        testJobB["couch_record"] = makeUUID()
        testJobB.create(group=testJobGroup)
        testJobB["state"] = "complete"

        testJobC = Job(name=makeUUID(), files=[])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group=testJobGroup)
        testJobC["state"] = "new"

        changeStateAction = self.daoFactory(classname="Jobs.ChangeState")
        changeStateAction.execute(jobs=[testJobA, testJobB, testJobC])

        runningJobsAction = self.daoFactory(
            classname="Monitoring.ListRunningJobs")
        runningJobs = runningJobsAction.execute()

        assert len(runningJobs) == 2, \
               "Error: Wrong number of running jobs returned."

        for runningJob in runningJobs:
            if runningJob["job_name"] == testJobA["name"]:
                assert runningJob["state"] == testJobA["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobA["couch_record"], \
                       "Error: Running job has wrong couch record."
            else:
                assert runningJob["job_name"] == testJobC["name"], \
                       "Error: Running job has wrong name."
                assert runningJob["state"] == testJobC["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobC["couch_record"], \
                       "Error: Running job has wrong couch record."

        return
Ejemplo n.º 58
0
    def testRecordInCouch(self):
        """
        _testRecordInCouch_

        Verify that jobs, state transitions and fwjrs are recorded correctly.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl,
                                owner="Steve",
                                name="wf001",
                                task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")
        testSubscription.create()

        testFileA = File(lfn="SomeLFNA",
                         events=1024,
                         size=2048,
                         locations=set(["T2_CH_CERN"]))
        testFileB = File(lfn="SomeLFNB",
                         events=1025,
                         size=2049,
                         locations=set(["T2_CH_CERN"]))
        testFileA.create()
        testFileB.create()

        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 2, \
            "Error: Splitting should have created two jobs."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Merge"
        testJobB = jobGroup.jobs[1]
        testJobB["user"] = "******"
        testJobB["group"] = "DMWM"
        testJobB["taskType"] = "Processing"

        change.propagate([testJobA, testJobB], "new", "none")
        change.propagate([testJobA, testJobB], "created", "new")
        change.propagate([testJobA, testJobB], "executing", "created")

        testJobADoc = change.jobsdatabase.document(testJobA["couch_record"])

        for transition in testJobADoc["states"].itervalues():
            self.assertTrue(
                isinstance(transition["timestamp"], int)
                or isinstance(transition["timestamp"], long))

        self.assertEqual(testJobADoc["jobid"], testJobA["id"],
                         "Error: ID parameter is incorrect.")
        assert testJobADoc["name"] == testJobA["name"], \
            "Error: Name parameter is incorrect."
        assert testJobADoc["jobgroup"] == testJobA["jobgroup"], \
            "Error: Jobgroup parameter is incorrect."
        assert testJobADoc["workflow"] == testJobA["workflow"], \
            "Error: Workflow parameter is incorrect."
        assert testJobADoc["task"] == testJobA["task"], \
            "Error: Task parameter is incorrect."
        assert testJobADoc["owner"] == testJobA["owner"], \
            "Error: Owner parameter is incorrect."

        assert testJobADoc["mask"]["FirstEvent"] == testJobA["mask"]["FirstEvent"], \
            "Error: First event in mask is incorrect."
        assert testJobADoc["mask"]["LastEvent"] == testJobA["mask"]["LastEvent"], \
            "Error: Last event in mask is incorrect."
        assert testJobADoc["mask"]["FirstLumi"] == testJobA["mask"]["FirstLumi"], \
            "Error: First lumi in mask is incorrect."
        assert testJobADoc["mask"]["LastLumi"] == testJobA["mask"]["LastLumi"], \
            "Error: First lumi in mask is incorrect."
        assert testJobADoc["mask"]["FirstRun"] == testJobA["mask"]["FirstRun"], \
            "Error: First run in mask is incorrect."
        assert testJobADoc["mask"]["LastEvent"] == testJobA["mask"]["LastRun"], \
            "Error: First event in mask is incorrect."

        assert len(testJobADoc["inputfiles"]) == 1, \
            "Error: Input files parameter is incorrect."

        testJobBDoc = change.jobsdatabase.document(testJobB["couch_record"])

        assert testJobBDoc["jobid"] == testJobB["id"], \
            "Error: ID parameter is incorrect."
        assert testJobBDoc["name"] == testJobB["name"], \
            "Error: Name parameter is incorrect."
        assert testJobBDoc["jobgroup"] == testJobB["jobgroup"], \
            "Error: Jobgroup parameter is incorrect."

        assert testJobBDoc["mask"]["FirstEvent"] == testJobB["mask"]["FirstEvent"], \
            "Error: First event in mask is incorrect."
        assert testJobBDoc["mask"]["LastEvent"] == testJobB["mask"]["LastEvent"], \
            "Error: Last event in mask is incorrect."
        assert testJobBDoc["mask"]["FirstLumi"] == testJobB["mask"]["FirstLumi"], \
            "Error: First lumi in mask is incorrect."
        assert testJobBDoc["mask"]["LastLumi"] == testJobB["mask"]["LastLumi"], \
            "Error: First lumi in mask is incorrect."
        assert testJobBDoc["mask"]["FirstRun"] == testJobB["mask"]["FirstRun"], \
            "Error: First run in mask is incorrect."
        assert testJobBDoc["mask"]["LastEvent"] == testJobB["mask"]["LastRun"], \
            "Error: First event in mask is incorrect."

        assert len(testJobBDoc["inputfiles"]) == 1, \
            "Error: Input files parameter is incorrect."

        changeStateDB = self.couchServer.connectDatabase(
            dbname="changestate_t/jobs")
        allDocs = changeStateDB.document("_all_docs")

        self.assertEqual(len(allDocs["rows"]), 3,
                         "Error: Wrong number of documents.")

        couchJobDoc = changeStateDB.document("1")

        assert couchJobDoc["name"] == testJobA["name"], \
            "Error: Name is wrong"
        assert len(couchJobDoc["inputfiles"]) == 1, \
            "Error: Wrong number of input files."

        result = changeStateDB.loadView("JobDump", "jobsByWorkflowName")

        self.assertEqual(len(result["rows"]), 2,
                         "Error: Wrong number of rows.")
        for row in result["rows"]:
            couchJobDoc = changeStateDB.document(row["value"]["id"])
            self.assertEqual(couchJobDoc["_rev"], row["value"]["rev"],
                             "Error: Rev is wrong.")

        return
Ejemplo n.º 59
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "Locations.New")
        locationAction.execute(siteName = "site1", pnn = "T2_CH_CERN")

        self.multipleFileFileset = Fileset(name = "TestFileset1")
        self.multipleFileFileset.create()
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"]))
            newFile.addRun(Run(i, *[45+i]))
            newFile.create()
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name = "TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["T2_CH_CERN"]))
        newFile.addRun(Run(1, *[45]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleFileLumiset = Fileset(name = "TestFileset3")
        self.multipleFileLumiset.create()
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"]))
            newFile.addRun(Run(1, *[45+i/3]))
            newFile.create()
            self.multipleFileLumiset.addFile(newFile)
        self.multipleFileLumiset.commit()

        self.singleLumiFileset = Fileset(name = "TestFileset4")
        self.singleLumiFileset.create()
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"]))
            newFile.addRun(Run(1, *[45]))
            newFile.create()
            self.singleLumiFileset.addFile(newFile)
        self.singleLumiFileset.commit()


        testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf001", task="Test")
        testWorkflow.create()
        self.multipleFileSubscription  = Subscription(fileset = self.multipleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")
        self.singleFileSubscription    = Subscription(fileset = self.singleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")
        self.multipleLumiSubscription  = Subscription(fileset = self.multipleFileLumiset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")
        self.singleLumiSubscription    = Subscription(fileset = self.singleLumiFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")

        self.multipleFileSubscription.create()
        self.singleFileSubscription.create()
        self.multipleLumiSubscription.create()
        self.singleLumiSubscription.create()
        return
Ejemplo n.º 60
0
    def createJobs(self):
        """
        _createJobs_

        Create test jobs in WMBS and BossAir
        """
        testWorkflow = Workflow(spec=makeUUID(),
                                owner="tapas",
                                name=makeUUID(),
                                task="Test")
        testWorkflow.create()

        testFilesetA = Fileset(name="TestFilesetA")
        testFilesetA.create()
        testFilesetB = Fileset(name="TestFilesetB")
        testFilesetB.create()
        testFilesetC = Fileset(name="TestFilesetC")
        testFilesetC.create()

        testFileA = File(lfn="testFileA",
                         locations=set(["testSE1", "testSE2"]))
        testFileA.create()
        testFilesetA.addFile(testFileA)
        testFilesetA.commit()
        testFilesetB.addFile(testFileA)
        testFilesetB.commit()
        testFilesetC.addFile(testFileA)
        testFilesetC.commit()

        testSubscriptionA = Subscription(fileset=testFilesetA,
                                         workflow=testWorkflow,
                                         type="Processing")
        testSubscriptionA.create()
        testSubscriptionA.addWhiteBlackList([{
            "site_name": "testSite1",
            "valid": True
        }])
        testSubscriptionB = Subscription(fileset=testFilesetB,
                                         workflow=testWorkflow,
                                         type="Processing")
        testSubscriptionB.create()
        testSubscriptionB.addWhiteBlackList([{
            "site_name": "testSite1",
            "valid": False
        }])
        testSubscriptionC = Subscription(fileset=testFilesetC,
                                         workflow=testWorkflow,
                                         type="Merge")
        testSubscriptionC.create()

        testJobGroupA = JobGroup(subscription=testSubscriptionA)
        testJobGroupA.create()
        testJobGroupB = JobGroup(subscription=testSubscriptionB)
        testJobGroupB.create()
        testJobGroupC = JobGroup(subscription=testSubscriptionC)
        testJobGroupC.create()

        # Site1, Has been assigned a location and is complete.
        testJobA = Job(name="testJobA", files=[testFileA])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group=testJobGroupA)
        testJobA["state"] = "success"

        # Site 1, Has been assigned a location and is incomplete.
        testJobB = Job(name="testJobB", files=[testFileA])
        testJobB["couch_record"] = makeUUID()
        testJobB["cache_dir"] = self.tempDir
        testJobB.create(group=testJobGroupA)
        testJobB["state"] = "executing"
        runJobB = RunJob()
        runJobB.buildFromJob(testJobB)
        runJobB["status"] = "PEND"

        # Does not have a location, white listed to site 1
        testJobC = Job(name="testJobC", files=[testFileA])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group=testJobGroupA)
        testJobC["state"] = "new"

        # Site 2, Has been assigned a location and is complete.
        testJobD = Job(name="testJobD", files=[testFileA])
        testJobD["couch_record"] = makeUUID()
        testJobD.create(group=testJobGroupB)
        testJobD["state"] = "success"

        # Site 2, Has been assigned a location and is incomplete.
        testJobE = Job(name="testJobE", files=[testFileA])
        testJobE["couch_record"] = makeUUID()
        testJobE.create(group=testJobGroupB)
        testJobE["state"] = "executing"
        runJobE = RunJob()
        runJobE.buildFromJob(testJobE)
        runJobE["status"] = "RUN"

        # Does not have a location, site 1 is blacklisted.
        testJobF = Job(name="testJobF", files=[testFileA])
        testJobF["couch_record"] = makeUUID()
        testJobF.create(group=testJobGroupB)
        testJobF["state"] = "new"

        # Site 3, Has been assigned a location and is complete.
        testJobG = Job(name="testJobG", files=[testFileA])
        testJobG["couch_record"] = makeUUID()
        testJobG.create(group=testJobGroupC)
        testJobG["state"] = "cleanout"

        # Site 3, Has been assigned a location and is incomplete.
        testJobH = Job(name="testJobH", files=[testFileA])
        testJobH["couch_record"] = makeUUID()
        testJobH.create(group=testJobGroupC)
        testJobH["state"] = "new"

        # Site 3, Does not have a location.
        testJobI = Job(name="testJobI", files=[testFileA])
        testJobI["couch_record"] = makeUUID()
        testJobI.create(group=testJobGroupC)
        testJobI["state"] = "new"

        # Site 3, Does not have a location and is in cleanout.
        testJobJ = Job(name="testJobJ", files=[testFileA])
        testJobJ["couch_record"] = makeUUID()
        testJobJ.create(group=testJobGroupC)
        testJobJ["state"] = "cleanout"

        changeStateAction = self.daoFactory(classname="Jobs.ChangeState")
        changeStateAction.execute(jobs=[
            testJobA, testJobB, testJobC, testJobD, testJobE, testJobF,
            testJobG, testJobH, testJobI, testJobJ
        ])

        self.insertRunJob.execute([runJobB, runJobE])

        setLocationAction = self.daoFactory(classname="Jobs.SetLocation")
        setLocationAction.execute(testJobA["id"], "testSite1")
        setLocationAction.execute(testJobB["id"], "testSite1")
        setLocationAction.execute(testJobD["id"], "testSite1")
        setLocationAction.execute(testJobE["id"], "testSite2")
        setLocationAction.execute(testJobG["id"], "testSite1")
        setLocationAction.execute(testJobH["id"], "testSite1")

        return