Example #1
0
    def testCreateDeleteExistsNoFiles(self):
        """
        _testCreateDeleteExistsNoFiles_

        Create and then delete a job but don't add any input files to it.
        Use the job class's exists() method to determine if the job has been
        written to the database before it is created, after it has been created
        and after it has been deleted.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job(name="TestJob")

        assert testJob.exists() == False, "ERROR: Job exists before it was created"

        testJob.create(group=testJobGroup)

        assert testJob.exists() >= 0, "ERROR: Job does not exist after it was created"

        testJob.delete()

        assert testJob.exists() == False, "ERROR: Job exists after it was delete"

        return
Example #2
0
    def testCreateDeleteExists(self):
        """
        _testCreateDeleteExists_

        Create a JobGroup and then delete it.  Use the JobGroup's exists()
        method to determine if it exists before it is created, after it is
        created and after it is deleted.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task="Test")
        testWorkflow.create()

        testFileset = WMBSFileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)

        self.assertFalse(testJobGroup.exists())

        testJobGroup.create()

        self.assertTrue(testJobGroup.exists())

        testJobGroup.delete()

        self.assertFalse(testJobGroup.exists())

        testSubscription.delete()
        testFileset.delete()
        testWorkflow.delete()
        return
Example #3
0
    def testStoreResults(self):
        """
        _testStoreResults_

        Create a StoreResults workflow and verify it installs into WMBS
        correctly.
        """
        arguments = StoreResultsWorkloadFactory.getTestArguments()
        arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"})

        factory = StoreResultsWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload",
                                                           arguments)

        testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        testWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/StoreResults")
        testWorkflow.load()

        self.assertEqual(len(testWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["Merged"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name)

        logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        return
Example #4
0
    def createSingleJobWorkflow(self):
        """
        Create a workflow with one jobs and two files and store the results in instance variables
        """

        self.testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        self.testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=self.testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        self.testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        self.testFileA.addRun(Run(1, *[45]))
        self.testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        self.testFileB.addRun(Run(1, *[46]))
        self.testFileA.create()
        self.testFileB.create()

        self.testJob = Job(name="TestJob", files=[self.testFileA, self.testFileB])

        self.testJob.create(group=testJobGroup)
        self.testJob.associateFiles()
Example #5
0
    def testListRunningJobs(self):
        """
        _testListRunningJobs_

        Test the ListRunningJobs DAO.
        """
        testWorkflow = Workflow(spec = makeUUID(), owner = "Steve",
                                name = makeUUID(), task="Test")
        testWorkflow.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = testWorkflow,
                                        type = "Processing")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testJobA = Job(name = makeUUID(), files = [])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group = testJobGroup)
        testJobA["state"] = "executing"

        testJobB = Job(name = makeUUID(), files = [])
        testJobB["couch_record"] = makeUUID()
        testJobB.create(group = testJobGroup)
        testJobB["state"] = "complete"

        testJobC = Job(name = makeUUID(), files = [])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group = testJobGroup)        
        testJobC["state"] = "new"

        changeStateAction = self.daoFactory(classname = "Jobs.ChangeState")
        changeStateAction.execute(jobs = [testJobA, testJobB, testJobC])

        runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs")
        runningJobs = runningJobsAction.execute()

        assert len(runningJobs) == 2, \
               "Error: Wrong number of running jobs returned."

        for runningJob in runningJobs:
            if runningJob["job_name"] == testJobA["name"]:
                assert runningJob["state"] == testJobA["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobA["couch_record"], \
                       "Error: Running job has wrong couch record."
            else:
                assert runningJob["job_name"] == testJobC["name"], \
                       "Error: Running job has wrong name."
                assert runningJob["state"] == testJobC["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobC["couch_record"], \
                       "Error: Running job has wrong couch record."                

        return
Example #6
0
    def testCreateTransaction(self):
        """
        _testCreateTransaction_

        Create a JobGroup and commit it to the database.  Rollback the database
        transaction and verify that the JobGroup is no longer in the database.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        testWorkflow.create()

        testFileset = WMBSFileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)

        assert testJobGroup.exists() == False, "ERROR: Job group exists before it was created"

        myThread = threading.currentThread()
        myThread.transaction.begin()

        testJobGroup.create()

        assert testJobGroup.exists() >= 0, "ERROR: Job group does not exist after it was created"

        myThread.transaction.rollback()

        assert testJobGroup.exists() == False, "ERROR: Job group exists after transaction was rolled back."

        testSubscription.delete()
        testFileset.delete()
        testWorkflow.delete()
        return
Example #7
0
    def createWorkflow(self, task):
        """
        Register job into WMBS for each task through Workflows

        """

        specURL = self.getWorkflowURL(task)

        fileSet = Fileset(name=self.getFilesetName(task), is_open=True)
        fileSet.create()

        taskFlow = Workflow(spec=specURL, owner=self.owner, dn=self.owner_dn,
                            name=self.getWorkflowName(task), task=task.name())
        taskFlow.create()

        self.workflowDict[task.name()] = taskFlow

        # Insert workflow into task
        setattr(task.data.input.WMBS, 'WorkflowSpecURL', specURL)

        # If the job is a merge job
        # Find the task it merges from
        # Then find the workflow for that task and assign it an output
        if hasattr(task.inputReference(), 'outputModule'):
            dummyStepName = task.inputReference().inputStep.split('/')[-1]
            taskName = task.inputReference().inputStep.split('/')[-2]
            outputModule = task.inputReference().outputModule
            if taskName not in self.workflowDict.keys():
                raise Exception('I am being asked to chain output for a task %s which does not yet exist' % taskName)
            outputWorkflow = self.workflowDict[taskName]
            outputWorkflow.addOutput(outputModule, fileSet)

        logging.info('Registered workflow for step %s' % (task.name()))

        return taskFlow, fileSet
Example #8
0
    def createTestJob(subscriptionType="Merge"):
        """
        _createTestJob_

        Create a test job with two files as input.  This will also create the
        appropriate workflow, jobgroup and subscription.
        """

        testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))
        testFileA.create()
        testFileB.create()

        testJob = Job(name=makeUUID(), files=[testFileA, testFileB])
        testJob["couch_record"] = "somecouchrecord"
        testJob["location"] = "test.site.ch"
        testJob.create(group=testJobGroup)
        testJob.associateFiles()

        return testJob
Example #9
0
    def testDifferentSubscritionIDs(self):
        """
        _testDifferentSubscriptionIDs_

        Make sure that the merge splitting still runs if the subscription ID
        is not equal to the workflow ID.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()
        dummyWorkflow = Workflow(name = "dummyWorkflow", spec = "bunk49",
                                 owner = "Steve", task="Test2")
        dummyWorkflow.create()
        dummyFileset = Fileset(name = "dummyFileset")
        dummyFileset.create()
        dummySubscription1 = Subscription(fileset = dummyFileset,
                                          workflow = dummyWorkflow,
                                          split_algo = "ParentlessMergeBySize")
        dummySubscription2 = Subscription(fileset = dummyFileset,
                                          workflow = dummyWorkflow,
                                          split_algo = "ParentlessMergeBySize")
        dummySubscription1.create()
        dummySubscription2.create()
        myThread.transaction.commit()

        self.stuffWMBS()
        splitter = SplitterFactory()
        jobFactory = splitter(package = "WMCore.WMBS",
                              subscription = self.mergeSubscription)
        result = jobFactory(min_merge_size = 4097, max_merge_size = 99999999,
                            max_merge_events = 999999999, merge_across_runs = False)
        self.assertEqual(len(result), 1)
        jobGroup = result[0]
        self.assertEqual(len(jobGroup.jobs), 2)
        return
Example #10
0
    def atestD_Timing(self):
        """
        _Timing_

        This is to see how fast things go.
        """

        return

        myThread = threading.currentThread()

        name = makeUUID()

        config = self.getConfig()
        jobList = self.createGiantJobSet(name=name, config=config, nSubs=10, nJobs=1000, nFiles=10)

        testTaskArchiver = TaskArchiverPoller(config=config)

        startTime = time.time()
        testTaskArchiver.algorithm()
        stopTime = time.time()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)
        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        logging.info("TaskArchiver took %f seconds" % (stopTime - startTime))
Example #11
0
    def createTestSubscription(self, nFiles, nSites=1, closeFileset=False):
        """
        _createTestSubscription_
        
        Create a set of test subscriptions for testing purposes.
        """

        if nSites > self.nSites:
            nSites = self.nSites

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        # Create a testWorkflow
        testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test")
        testWorkflow.create()

        # Create the files for each site
        for s in range(nSites):
            for i in range(nFiles):
                newFile = File(makeUUID(), size=1024, events=100, locations=set(["site%i.cern.ch" % s]))
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(
            fileset=testFileset, workflow=testWorkflow, split_algo="MinFileBased", type="Processing"
        )
        testSubscription.create()

        # Close the fileset
        if closeFileset:
            testFileset.markOpen(isOpen=False)

        return testSubscription
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        for i in range(nFiles):
            newFile = self.createFile("%s_%i" % (baseName, i), nEventsPerFile, i, lumisPerFile, "somese.cern.ch")
            newFile.create()
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile("%s_%i_2" % (baseName, i), nEventsPerFile, i, lumisPerFile, "otherse.cern.ch")
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(
            fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing"
        )
        testSubscription.create()

        return testSubscription
Example #13
0
    def setupRepackWorkflow(self):
        """
        _setupRepackWorkflow_

        Populate WMBS with a repack-like workflow,
        every subscription must be unfinished at first
        """

        workflowName = 'Repack_Run481516_StreamZ'
        mergeTasks = ['RepackMergewrite_QuadElectron_RAW', 'RepackMergewrite_TriPhoton_RAW',
                      'RepackMergewrite_SingleNeutrino_RAW']

        self.stateMap = {'Merge': [],
                         'Processing Done': []}
        self.orderedStates = ['Merge', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        # Create a wmspec in disk
        workload = newWorkload(workflowName)
        repackTask = workload.newTask('Repack')
        for task in mergeTasks:
            repackTask.addTask(task)
        repackTask.addTask('RepackCleanupUnmergedwrite_QuadElectron_RAW')

        specPath = os.path.join(self.testDir, 'Repack.pkl')
        workload.save(specPath)

        # Populate WMBS
        topFileset = Fileset(name='TestStreamerFileset')
        topFileset.create()

        options = {'spec': specPath, 'owner': 'ItsAMeMario',
                   'name': workflowName, 'wfType': 'tier0'}
        topLevelWorkflow = Workflow(task='/%s/Repack' % workflowName,
                                    **options)
        topLevelWorkflow.create()
        topLevelSub = Subscription(topFileset, topLevelWorkflow)
        topLevelSub.create()
        self.stateMap['Merge'].append(topFileset)
        for task in mergeTasks:
            mergeWorkflow = Workflow(task='/%s/Repack/%s' % (workflowName, task), **options)
            mergeWorkflow.create()
            unmergedFileset = Fileset(name='TestUnmergedFileset%s' % task)
            unmergedFileset.create()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.create()
            self.stateMap['Processing Done'].append(unmergedFileset)
        cleanupWorkflow = Workflow(task='/Repack_Run481516_StreamZ/Repack/RepackCleanupUnmergedwrite_QuadElectron_RAW',
                                   **options)
        cleanupWorkflow.create()
        unmergedFileset = Fileset(name='TestUnmergedFilesetToCleanup')
        unmergedFileset.create()
        cleanupSub = Subscription(unmergedFileset, cleanupWorkflow)
        cleanupSub.create()

        return
Example #14
0
    def testParentageByJob(self):
        """
        _testParentageByJob_
        
        Tests the DAO that assigns parentage by Job
        """

        testWorkflow = Workflow(spec = 'hello', owner = "mnorman",
                                name = "wf001", task="basicWorkload/Production")
        testWorkflow.create()
        testFileset = Fileset(name = "TestFileset")
        testFileset.create()
        testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased")
        testSubscription.create()
        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024,
                              events = 20, checksums = {'cksum': 1})
        testFileParentA.addRun(Run( 1, *[45]))
        testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024,
                              events = 20, checksums = {'cksum': 1})
        testFileParentB.addRun(Run( 1, *[45]))
        testFileParentA.create()
        testFileParentB.create()

        testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10,
                         checksums = {'cksum':1})
        testFileA.addRun(Run( 1, *[45]))
        testFileA.create()

        testJobA = Job()
        testJobA.create(group = testJobGroup)
        testJobA.addFile(testFileParentA)
        testJobA.addFile(testFileParentB)
        testJobA.associateFiles()


        parentAction = self.daofactory(classname = "Files.SetParentageByJob")
        parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']})


        testFileB = File(id = testFileA["id"])
        testFileB.loadData(parentage = 1)

        goldenFiles = [testFileParentA, testFileParentB]
        for parentFile in testFileB["parents"]:
            self.assertEqual(parentFile in goldenFiles, True,
                   "ERROR: Unknown parent file")
            goldenFiles.remove(parentFile)

        self.assertEqual(len(goldenFiles), 0,
                         "ERROR: Some parents are missing")
Example #15
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site,
                        taskType='Processing', name=None, wfPrio=1, changeState=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        changeState is an instance of the ChangeState class to make job status changes
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec, owner="tapas",
                                name=name, task="basicWorkload/Production",
                                priority=wfPrio)
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):
            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type=taskType,
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name, task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        if changeState:
            for group in jobGroupList:
                changeState.propagate(group.jobs, 'created', 'new')

        return jobGroupList
Example #16
0
    def testDeleteTransaction(self):
        """
        _testDeleteTransaction_

        Create a new job and commit it to the database.  Start a new transaction
        and delete the file from the database.  Verify that the file has been
        deleted.  After that, roll back the transaction and verify that the
        job is once again in the database.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileA.create()
        testFileB.create()

        testJob = Job(name="TestJob", files=[testFileA, testFileB])

        assert testJob.exists() is False, \
            "ERROR: Job exists before it was created"

        testJob.create(group=testJobGroup)

        assert testJob.exists() >= 0, \
            "ERROR: Job does not exist after it was created"

        myThread = threading.currentThread()
        myThread.transaction.begin()

        testJob.delete()

        assert testJob.exists() is False, \
            "ERROR: Job exists after it was delete"

        myThread.transaction.rollback()

        assert testJob.exists() >= 0, \
            "ERROR: Job does not exist after transaction was rolled back."

        return
Example #17
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]):
        """
        Creates a series of jobGroups for submissions

        """

        jobGroupList = []

        testWorkflow = Workflow(
            spec=workloadSpec,
            owner="tapas",
            name=makeUUID(),
            task="basicWorkload/Production",
            owner_vogroup="phgroup",
            owner_vorole="cmsrole",
        )
        testWorkflow.create()

        # Create subscriptions
        for i in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(
                fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased"
            )
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(
                name=name,
                task=task,
                nJobs=nJobs,
                jobGroup=testJobGroup,
                fileset=testFileset,
                sub=testSubscription.exists(),
                site=site,
                bl=bl,
                wl=wl,
            )

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList
Example #18
0
    def stuffWMBS(self):
        """
        _stuffWMBS_

        Stuff WMBS with workflows
        """
        workflow = Workflow(spec = 'spec.xml', name = 'ReRecoTest_v0Emulator',
                            task = '/ReRecoTest_v0Emulator/Test', priority = 10)
        workflow.create()
        inputFileset = Fileset(name = 'TestFileset')
        inputFileset.create()
        subscription = Subscription(inputFileset, workflow)
        subscription.create()
Example #19
0
    def createTestJobs(self, nJobs, cacheDir):
        """
        _createTestJobs_

        Create several jobs
        """


        testWorkflow = Workflow(spec = "spec.xml", owner = "Simon",
                                name = "wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name = "TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset = testWMBSFileset,
                                        workflow = testWorkflow,
                                        type = "Processing",
                                        split_algo = "FileBased")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        # Create a file
        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')
        testFileA.create()

        baseName = makeUUID()

        # Now create a job
        for i in range(nJobs):
            testJob = Job(name = '%s-%i' % (baseName, i))
            testJob.addFile(testFileA)
            testJob['location'] = 'malpaquet'
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob.create(testJobGroup)
            testJob.save()
            testJobGroup.add(testJob)

        testJobGroup.commit()

        # Set test job caches
        for job in testJobGroup.jobs:
            job.setCache(cacheDir)

        return testJobGroup
Example #20
0
    def testGetGroupsByJobStateDAO(self):
        """
        _testGetGroupsByJobStateDAO_

        Verify that the GetGrounsByJobState DAO does what it is supposed to do.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = WMBSFileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroupA = JobGroup(subscription=testSubscription)
        testJobGroupA.create()
        testJobGroupB = JobGroup(subscription=testSubscription)
        testJobGroupB.create()

        testJobA = Job(name="TestJobA")
        testJobB = Job(name="TestJobB")

        testJobGroupA.add(testJobA)
        testJobGroupB.add(testJobB)

        testJobGroupA.commit()
        testJobGroupB.commit()

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        stateChangeAction = daofactory(classname="Jobs.ChangeState")
        testJobA["state"] = "complete"
        testJobB["state"] = "executing"
        stateChangeAction.execute(jobs=[testJobA, testJobB])

        jobGroupAction = daofactory(classname="JobGroup.GetGroupsByJobState")
        jobGroups = jobGroupAction.execute(jobState="complete")

        assert len(jobGroups) == 1, \
            "Error: Wrong number of job groups returned."
        assert jobGroups[0] == testJobGroupA.id, \
            "Error: Wrong job group returned."

        return
Example #21
0
    def createLargerTestJobGroup(self, commitFlag=True):
        """
        _createTestJobGroup_

        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = WMBSFileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10)
        testFileC.addRun(Run(10, *[12312]))
        testFileC.setLocation("T2_CH_CERN")
        testFileC.setLocation("malpaquet")

        testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10)
        testFileD.addRun(Run(10, *[12312]))
        testFileD.setLocation("T2_CH_CERN")
        testFileD.setLocation("malpaquet")

        testFileC.create()
        testFileD.create()

        testJobA = Job(name="TestJobA1")
        testJobA.addFile(testFileC)

        testJobB = Job(name="TestJobB1")
        testJobB.addFile(testFileD)

        testJobGroup.add(testJobA)
        testJobGroup.add(testJobB)

        for i in range(0, 100):
            testJob = Job(name="TestJob%i" % (i))
            testJob.addFile(testFileC)
            testJobGroup.add(testJob)

        if commitFlag:
            testJobGroup.commit()

        return testJobGroup
Example #22
0
    def notestCreateDeleteExists(self):
        """
        Create and then delete a job and workflow.  Use the workunit class's exists() method to
        determine if the workunit has been written to the database before the job is
        created, after the job has been created, and after the workflow has been deleted.
        """

        testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))

        testFileA.create()
        testFileB.create()

        testJob = Job(name="TestJob", files=[testFileA, testFileB])
        testWU1 = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[45]))
        testWU2 = WorkUnit(taskID=testWorkflow.id, fileid=testFileB['id'], runLumi=Run(1, *[46]))

        self.assertFalse(testWU1.exists(), "WorkUnit exists before job was created")
        self.assertFalse(testWU2.exists(), "WorkUnit exists before job was created")

        testJob.create(group=testJobGroup)

        self.assertTrue(testWU1.exists(), "WorkUnit does not exist after job was created")
        self.assertTrue(testWU2.exists(), "WorkUnit does not exist after job was created")

        testJob.delete()

        self.assertTrue(testWU1.exists(), "WorkUnit does not exist after job is deleted")
        self.assertTrue(testWU2.exists(), "WorkUnit does not exist after job is deleted")

        testWorkflow.delete()

        self.assertFalse(testWU1.exists(), "WorkUnit exists after workflow is deleted")
        self.assertFalse(testWU2.exists(), "WorkUnit exists after workflow is deleted")

        return
Example #23
0
    def testCreateDeleteExists(self):
        """
        _testCreateDeleteExists_

        Create and then delete a job.  Use the job class's exists() method to
        determine if the job has been written to the database before it is
        created, after it has been created and after it has been deleted.
        """
        testWorkflow = Workflow(spec="spec.xml", owner="Simon",
                                name="wf001", task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileA.create()
        testFileB.create()

        testJob = Job(name="TestJob", files=[testFileA, testFileB])
        # testWU = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[44]))

        self.assertFalse(testJob.exists(), "Job exists before it was created")
        # self.assertFalse(testWU.exists(), "WorkUnit exists before it was created")

        testJob.create(group=testJobGroup)

        self.assertTrue(testJob.exists(), "Job does not exist after it was created")
        # self.assertFalse(testWU.exists(), "WorkUnit exists when there is no work")

        # Test the getWorkflow method
        workflow = testJob.getWorkflow()
        self.assertEqual(workflow['task'], 'Test')
        self.assertEqual(workflow['name'], 'wf001')

        testJob.delete()

        self.assertFalse(testJob.exists(), "Job exists after it was deleted")
        # self.assertFalse(testWU.exists(), "WorkUnit exists after job is deleted")

        return
Example #24
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site,
                        taskType='Processing', name=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec, owner="tapas",
                                name=name, task="basicWorkload/Production",
                                priority=1)
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type=taskType,
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name, task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList
Example #25
0
    def createDummyJobs(self, nJobs, location=None):
        """
        _createDummyJobs_

        Create some dummy jobs
        """

        if not location:
            location = self.sites[0]

        nameStr = makeUUID()

        testWorkflow = Workflow(
            spec=nameStr,
            owner="tapas",
            name=nameStr,
            task="basicWorkload/Production",
            owner_vogroup="phgroup",
            owner_vorole="cmsrole",
        )
        testWorkflow.create()

        testFileset = Fileset(name=nameStr)
        testFileset.create()

        testSubscription = Subscription(
            fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased"
        )
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        jobList = []

        for i in range(nJobs):
            testJob = Job(name="%s-%i" % (nameStr, i))
            testJob["location"] = location
            testJob["custom"]["location"] = location
            testJob["userdn"] = "tapas"
            testJob["owner"] = "tapas"
            testJob["userrole"] = "cmsrole"
            testJob["usergroup"] = "phgroup"

            testJob.create(testJobGroup)
            jobList.append(testJob)

        return jobList
Example #26
0
    def load(self):
        """
        _load_

        Load all meta data associated with the JobGroup.  This includes the
        JobGroup id, uid, last_update time, subscription id and output fileset
        id.  Either the JobGroup id or uid must be specified for this to work.
        """
        existingTransaction = self.beginTransaction()

        if self.id > 0:
            loadAction = self.daofactory(classname = "JobGroup.LoadFromID")
            result = loadAction.execute(self.id, conn = self.getDBConn(),
                                        transaction = self.existingTransaction())
        else:
            loadAction = self.daofactory(classname = "JobGroup.LoadFromUID")
            result = loadAction.execute(self.uid, conn = self.getDBConn(),
                                        transaction = self.existingTransaction())

        self.id = result["id"]
        self.uid = result["uid"]
        self.lastUpdate = result["last_update"]

        self.subscription = Subscription(id = result["subscription"])
        self.subscription.load()

        self.output = Fileset(id = result["output"])
        self.output.load()

        self.jobs = []
        self.commitTransaction(existingTransaction)
        return
Example #27
0
    def setUp(self):
        """
        _setUp_

        Create a single subscription with one file.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = ["WMCore.WMBS"],
                                useDefault = False)
        
        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)
        
        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute(siteName = "site1", seName = "somese.cern.ch")
        locationAction.execute(siteName = "site2", seName = "otherse.cern.ch")
        
        self.testFileset = Fileset(name = "TestFileset1")
        self.testFileset.create()
        
        testWorkflow = Workflow(spec = "spec.xml", owner = "Steve",
                                name = "wf001", task="Test" )
        testWorkflow.create()
        self.testSubscription = Subscription(fileset = self.testFileset,
                                             workflow = testWorkflow,
                                             split_algo = "Periodic",
                                             type = "Processing")
        self.testSubscription.create()
        return
Example #28
0
    def create(self):
        """
        Add the new jobgroup to WMBS, create the output Fileset object
        """
        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        # overwrite base class self.output for WMBS fileset
        self.output = Fileset(name=makeUUID())
        self.output.create()

        if self.uid == None:
            self.uid = makeUUID()

        action = self.daofactory(classname="JobGroup.New")
        action.execute(
            self.uid,
            self.subscription["id"],
            self.output.id,
            conn=self.getDBConn(),
            transaction=self.existingTransaction(),
        )

        self.id = self.exists()
        self.commitTransaction(existingTransaction)

        return
Example #29
0
    def testAddChecksumsByLFN(self):
        """
        _testAddChecksumsByLFN_
        
        Tests for adding checksums by DAO by LFN
        """

        testWorkflow = Workflow(spec="hello", owner="mnorman", name="wf001", task="basicWorkload/Production")
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()
        testSubscription = Subscription(
            fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased"
        )
        testSubscription.create()
        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileA.create()
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[45]))
        testFileB.create()

        testJobA = Job()
        testJobA.create(group=testJobGroup)
        testJobA.associateFiles()

        parentAction = self.daofactory(classname="Files.AddChecksumByLFN")
        binds = [
            {"lfn": testFileA["lfn"], "cktype": "cksum", "cksum": 101},
            {"lfn": testFileA["lfn"], "cktype": "adler32", "cksum": 201},
            {"lfn": testFileB["lfn"], "cktype": "cksum", "cksum": 101},
        ]
        parentAction.execute(bulkList=binds)

        testFileC = File(id=testFileA["id"])
        testFileC.loadData()
        testFileD = File(id=testFileB["id"])
        testFileD.loadData()

        self.assertEqual(testFileC["checksums"], {"adler32": "201", "cksum": "101"})
        self.assertEqual(testFileD["checksums"], {"cksum": "101"})

        return
Example #30
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites = False, rand = False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name = baseName)
        testFileset.create()
        parentFile = File('%s_parent' % (baseName), size = 1000, events = 100,
                          locations = set(["somese.cern.ch"]))
        parentFile.create()
        for i in range(nFiles):
            newFile = File(lfn = '%s_%i' % (baseName, i), size = 1000,
                           events = 100, locations = "somese.cern.ch")
            lumis = []
            for lumi in range(lumisPerFile):
                if rand:
                    lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                else:
                    lumis.append((100 * i) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            newFile.addParent(parentFile['lfn'])
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn = '%s_%i_2' % (baseName, i), size = 1000,
                               events = 100, locations = "otherse.cern.ch")
                lumis = []
                for lumi in range(lumisPerFile):
                    if rand:
                        lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                    else:
                        lumis.append((100 * i) + lumi)
                newFile.addRun(Run(i, *lumis))
                newFile.create()
                newFile.addParent(parentFile['lfn'])
                testFileset.addFile(newFile)
        testFileset.commit()


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "LumiBased",
                                         type = "Processing")
        testSubscription.create()

        return testSubscription
Example #31
0
    def _checkTask(self, task, taskConf, centralConf):
        """
        _checkTask_

        Verify the correctness of the task

        """
        if taskConf.get("InputTask") is not None:
            inpTaskPath = task.getPathName()
            inpTaskPath = inpTaskPath.replace(task.name(), "")
            inpTaskPath += "cmsRun1"
            self.assertEqual(task.data.input.inputStep, inpTaskPath,
                             "Input step is wrong in the spec")
            self.assertTrue(
                taskConf["InputTask"] in inpTaskPath,
                "Input task is not in the path name for child task")

        if "MCPileup" in taskConf or "DataPileup" in taskConf:
            mcDataset = taskConf.get('MCPileup', None)
            dataDataset = taskConf.get('DataPileup', None)
            if mcDataset:
                self.assertEqual(task.data.steps.cmsRun1.pileup.mc.dataset,
                                 [mcDataset])
            if dataDataset:
                self.assertEqual(task.data.steps.cmsRun1.pileup.data.dataset,
                                 [dataDataset])

        workflow = Workflow(name=self.workload.name(), task=task.getPathName())
        workflow.load()

        outputMods = outputModuleList(task)
        self.assertEqual(len(workflow.outputMap.keys()), len(outputMods),
                         "Error: Wrong number of WF outputs")

        for outputModule in outputMods:
            filesets = workflow.outputMap[outputModule][0]
            merged = filesets['merged_output_fileset']
            unmerged = filesets['output_fileset']

            merged.loadData()
            unmerged.loadData()

            mergedset = task.getPathName() + "/" + task.name(
            ) + "Merge" + outputModule + "/merged-Merged"
            if outputModule == "logArchive" or not taskConf.get("KeepOutput", True) \
                or outputModule in taskConf.get("TransientOutputModules", []) or outputModule in centralConf.get("IgnoredOutputModules", []):
                mergedset = task.getPathName() + "/unmerged-" + outputModule
            unmergedset = task.getPathName() + "/unmerged-" + outputModule

            self.assertEqual(mergedset, merged.name,
                             "Merged fileset name is wrong")
            self.assertEqual(unmergedset, unmerged.name,
                             "Unmerged fileset name  is wrong")

            if outputModule != "logArchive" and taskConf.get("KeepOutput", True) \
                and outputModule not in taskConf.get("TransientOutputModules", []) \
                and outputModule not in centralConf.get("IgnoredOutputModules", []):
                mergeTask = task.getPathName() + "/" + task.name(
                ) + "Merge" + outputModule

                mergeWorkflow = Workflow(name=self.workload.name(),
                                         task=mergeTask)
                mergeWorkflow.load()
                self.assertTrue(
                    "Merged" in mergeWorkflow.outputMap,
                    "Merge workflow does not contain a Merged output key")
                mergedOutputMod = mergeWorkflow.outputMap['Merged'][0]
                mergedFileset = mergedOutputMod['merged_output_fileset']
                unmergedFileset = mergedOutputMod['output_fileset']
                mergedFileset.loadData()
                unmergedFileset.loadData()
                self.assertEqual(mergedFileset.name, mergedset,
                                 "Merged fileset name in merge task is wrong")
                self.assertEqual(
                    unmergedFileset.name, mergedset,
                    "Unmerged fileset name in merge task is wrong")

                mrgLogArch = mergeWorkflow.outputMap['logArchive'][0][
                    'merged_output_fileset']
                umrgLogArch = mergeWorkflow.outputMap['logArchive'][0][
                    'output_fileset']
                mrgLogArch.loadData()
                umrgLogArch.loadData()

                archName = task.getPathName() + "/" + task.name(
                ) + "Merge" + outputModule + "/merged-logArchive"

                self.assertEqual(
                    mrgLogArch.name, archName,
                    "LogArchive merged fileset name is wrong in merge task")
                self.assertEqual(
                    umrgLogArch.name, archName,
                    "LogArchive unmerged fileset name is wrong in merge task")

            if outputModule != "logArchive":
                taskOutputMods = task.getOutputModulesForStep(
                    stepName="cmsRun1")
                currentModule = getattr(taskOutputMods, outputModule)
                if taskConf.get("PrimaryDataset") is not None:
                    self.assertEqual(currentModule.primaryDataset,
                                     taskConf["PrimaryDataset"],
                                     "Wrong primary dataset")
                processedDatasetParts = [
                    "AcquisitionEra, ProcessingString, ProcessingVersion"
                ]
                allParts = True
                for part in processedDatasetParts:
                    if part in taskConf:
                        self.assertTrue(part in currentModule.processedDataset,
                                        "Wrong processed dataset for module")
                    else:
                        allParts = False
                if allParts:
                    self.assertEqual(
                        "%s-%s-v%s" % (taskConf["AcquisitionEra"],
                                       taskConf["ProcessingString"],
                                       taskConf["ProcessingVersion"]),
                        "Wrong processed dataset for module")

        # Test subscriptions
        if taskConf.get("InputTask") is None:
            inputFileset = "%s-%s-SomeBlock" % (self.workload.name(),
                                                task.name())
        elif "Merge" in task.getPathName().split("/")[-2]:
            inpTaskPath = task.getPathName().replace(task.name(), "")
            inputFileset = inpTaskPath + "merged-Merged"
        else:
            inpTaskPath = task.getPathName().replace(task.name(), "")
            inputFileset = inpTaskPath + "unmerged-%s" % taskConf[
                "InputFromOutputModule"]
        taskFileset = Fileset(name=inputFileset)
        taskFileset.loadData()

        taskSubscription = Subscription(fileset=taskFileset, workflow=workflow)
        taskSubscription.loadData()

        if taskConf.get("InputTask") is None and taskConf.get(
                "InputDataset") is None:
            # Production type
            self.assertEqual(
                taskSubscription["type"], "Production",
                "Error: Wrong subscription type for processing task")
            self.assertEqual(taskSubscription["split_algo"],
                             taskConf["SplittingAlgo"],
                             "Error: Wrong split algo for generation task")
        else:
            # Processing type
            self.assertEqual(taskSubscription["type"], "Processing",
                             "Wrong subscription type for task")
            if taskSubscription["split_algo"] != "WMBSMergeBySize":
                self.assertEqual(taskSubscription["split_algo"],
                                 taskConf['SplittingAlgo'],
                                 "Splitting algo mismatch")
            else:
                self.assertEqual(
                    taskFileset.name, inpTaskPath +
                    "unmerged-%s" % taskConf["InputFromOutputModule"],
                    "Subscription uses WMBSMergeBySize on a merge fileset")

        return
Example #32
0
    def deleteEverything(self):
        """
        _deleteEverything_

        This function should delete the subscription, and absolutely everything else having anything
        to do with the subscription that is NOT in use by any other piece.  It should check for all
        the proper ownerships through a sequence of DAO calls that will take forever.

        Nothing except the taskArchiver should be calling this.
        """
        existingTransaction = self.beginTransaction()

        jobGroups = self.getAllJobGroups()
        filesets = []

        # The order here is important
        # You need to delete files and filesets from the bottom up
        # In order to not violate parentage

        # Get output filesets from jobGroups
        for jobGroupID in jobGroups:
            loadAction = self.daofactory(classname = "JobGroup.LoadFromID")
            result = loadAction.execute(jobGroupID, conn = self.getDBConn(),
                                        transaction = self.existingTransaction())
            filesets.append(result['output'])

        # Get output filesets from the workflow
        for entry in self['workflow'].outputMap:
            for outputFilesets in self['workflow'].outputMap[entry]:
                wid = outputFilesets["output_fileset"].id
                if not wid in filesets:
                    filesets.append(wid)

        # Do the input fileset LAST!
        filesets.append(self['fileset'].id)

        self.commitTransaction(existingTransaction)

        # First, jobs
        # If there are too many jobs, delete them in separate
        # transactions to reduce database load
        deleteAction = self.daofactory(classname = "Jobs.Delete")
        jobDeleteList = []
        for job in self.getJobs():
            jobDeleteList.append(job['id'])
        if len(jobDeleteList) > 0:
            if len(jobDeleteList) <= self.bulkDeleteLimit:
                existingTransaction = self.beginTransaction()
                deleteAction.execute(id = jobDeleteList, conn = self.getDBConn(),
                                     transaction = self.existingTransaction())
                self.commitTransaction(existingTransaction)
            else:
                while len(jobDeleteList) > 0:
                    existingTransaction = self.beginTransaction()
                    toDelete = jobDeleteList[:self.bulkDeleteLimit]
                    jobDeleteList = jobDeleteList[self.bulkDeleteLimit:]
                    deleteAction.execute(id = toDelete, conn = self.getDBConn(),
                                         transaction = self.existingTransaction())

                    self.commitTransaction(existingTransaction)



        # Next jobGroups
        deleteAction = self.daofactory(classname = "JobGroup.Delete")
        existingTransaction = self.beginTransaction()
        for jobGroupID in jobGroups:
            deleteAction.execute(id = jobGroupID, conn = self.getDBConn(),
                                 transaction = self.existingTransaction())
        self.commitTransaction(existingTransaction)


        # Now, get the filesets that needs to be deleted
        action  = self.daofactory(classname = "Fileset.CheckForDelete")
        existingTransaction = self.beginTransaction()
        results = action.execute(fileids = filesets,
                                 subid = self['id'],
                                 conn = self.getDBConn(),
                                 transaction = self.existingTransaction())

        self.commitTransaction(existingTransaction)

        deleteFilesets = [x['id'] for x in results]

        # Delete files in sets
        # Each set of files deleted in a separate transaction
        for filesetID in deleteFilesets:
            fileset = Fileset(id = filesetID)

            # Load the files
            filesetFiles = []
            action  = self.daofactory(classname = "Files.InFileset")
            results = action.execute(fileset = filesetID,
                                     conn = self.getDBConn(),
                                     transaction = self.existingTransaction())

            for result in results:
                filesetFiles.append(result['fileid'])

            # Now get rid of unused files
            if len(filesetFiles) < 1:
                # if we have unused files, of course
                continue

            parent = self.daofactory(classname = "Files.DeleteParentCheck")
            action = self.daofactory(classname = "Files.DeleteCheck")

            if len(filesetFiles) <= self.bulkDeleteLimit:
                existingTransaction = self.beginTransaction()
                parent.execute(file = filesetFiles, fileset = fileset.id,
                               conn = self.getDBConn(),
                               transaction = self.existingTransaction())
                action.execute(file = filesetFiles, fileset = fileset.id,
                               conn = self.getDBConn(),
                               transaction = self.existingTransaction())
                self.commitTransaction(existingTransaction)
            else:
                while len(filesetFiles) > 0:
                    existingTransaction = self.beginTransaction()
                    toDelete     = filesetFiles[:self.bulkDeleteLimit]
                    filesetFiles = filesetFiles[self.bulkDeleteLimit:]
                    parent.execute(file = toDelete, fileset = fileset.id,
                                   conn = self.getDBConn(),
                                   transaction = self.existingTransaction())
                    action.execute(file = toDelete, fileset = fileset.id,
                                   conn = self.getDBConn(),
                                   transaction = self.existingTransaction())
                    self.commitTransaction(existingTransaction)


        # Start a new transaction for filesets, workflow, and the subscription
        existingTransaction = self.beginTransaction()
        for filesetID in deleteFilesets:
            # Now actually delete the filesets
            action = self.daofactory(classname = "Fileset.DeleteCheck")
            deleteFilesets = action.execute(fileid = filesetID,
                                            subid = self['id'],
                                            conn = self.getDBConn(),
                                            transaction = self.existingTransaction())

        #Next Workflow
        action = self.daofactory(classname = "Workflow.DeleteCheck")
        action.execute(workid = self["workflow"].id, subid = self["id"],
                       conn = self.getDBConn(),
                       transaction = self.existingTransaction())

        self.delete()
        self.commitTransaction(existingTransaction)
        return
Example #33
0
    def stuffWMBS(self, workflowURL, name):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s1", pnn="somese.cern.ch")

        mergeFileset = Fileset(name="mergeFileset")
        mergeFileset.create()
        bogusFileset = Fileset(name="bogusFileset")
        bogusFileset.create()

        mergeWorkflow = Workflow(spec=workflowURL,
                                 owner="mnorman",
                                 name=name,
                                 task="/TestWorkload/ReReco")
        mergeWorkflow.create()

        mergeSubscription = Subscription(fileset=mergeFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")
        mergeSubscription.create()
        dummySubscription = Subscription(fileset=bogusFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations={"somese.cern.ch"})
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations={"somese.cern.ch"})
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations={"somese.cern.ch"})
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations={"somese.cern.ch"})
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations={"somese.cern.ch"})
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations={"somese.cern.ch"})
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations={"somese.cern.ch"})
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations={"somese.cern.ch"})
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations={"somese.cern.ch"})
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=1024,
                       first_event=2048,
                       locations={"somese.cern.ch"})
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn="fileIV",
                      size=1024 * 1000000,
                      events=1024,
                      first_event=3072,
                      locations={"somese.cern.ch"})
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for fileObj in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV
        ]:
            mergeFileset.addFile(fileObj)
            bogusFileset.addFile(fileObj)

        mergeFileset.commit()
        bogusFileset.commit()

        return
Example #34
0
    def testJobSerialization(self):
        """
        _testJobSerialization_

        Verify that serialization of a job works when adding a FWJR.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl, owner="Steve",
                                name="wf001", task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"]))
        testFile.create()
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 1, \
               "Error: Splitting should have created one job."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"

        change.propagate([testJobA], 'created', 'new')
        myReport = Report()
        reportPath = os.path.join(getTestBase(),
                                  "WMCore_t/JobStateMachine_t/Report.pkl")
        myReport.unpersist(reportPath)
        testJobA["fwjr"] = myReport

        change.propagate([testJobA], 'executing', 'created')

        changeStateDB = self.couchServer.connectDatabase(dbname = "changestate_t/fwjrs")
        allDocs = changeStateDB.document("_all_docs")

        self.assertEqual(len(allDocs["rows"]), 2,
                         "Error: Wrong number of documents")

        result = changeStateDB.loadView("FWJRDump", "fwjrsByWorkflowName")
        self.assertEqual(len(result["rows"]), 1,
                         "Error: Wrong number of rows.")
        for row in result["rows"]:
            couchJobDoc = changeStateDB.document(row["value"]["id"])
            self.assertEqual(couchJobDoc["_rev"], row["value"]["rev"],
                             "Error: Rev is wrong.")

        for resultRow in allDocs["rows"]:
            if resultRow["id"] != "_design/FWJRDump":
                fwjrDoc = changeStateDB.document(resultRow["id"])
                break

        assert fwjrDoc["retrycount"] == 0, \
               "Error: Retry count is wrong."

        assert len(fwjrDoc["fwjr"]["steps"].keys()) == 2, \
               "Error: Wrong number of steps in FWJR."
        assert "cmsRun1" in fwjrDoc["fwjr"]["steps"].keys(), \
               "Error: cmsRun1 step is missing from FWJR."
        assert "stageOut1" in fwjrDoc["fwjr"]["steps"].keys(), \
               "Error: stageOut1 step is missing from FWJR."

        return
Example #35
0
    def testPersist(self):
        """
        _testPersist_

        This is the test class for function Propagate from module ChangeState
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl, owner="Steve",
                                name="wf001", task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        for i in range(4):
            newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"]))
            newFile.create()
            testFileset.addFile(newFile)

        testFileset.commit()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 4, \
               "Error: Splitting should have created four jobs."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"
        testJobB = jobGroup.jobs[1]
        testJobB["user"] = "******"
        testJobB["group"] = "DMWM"
        testJobB["taskType"] = "Processing"
        testJobC = jobGroup.jobs[2]
        testJobC["user"] = "******"
        testJobC["group"] = "DMWM"
        testJobC["taskType"] = "Processing"
        testJobD = jobGroup.jobs[3]
        testJobD["user"] = "******"
        testJobD["group"] = "DMWM"
        testJobD["taskType"] = "Processing"

        change.persist([testJobA, testJobB], "created", "new")
        change.persist([testJobC, testJobD], "new", "none")

        stateDAO = self.daoFactory(classname="Jobs.GetState")

        jobAState = stateDAO.execute(id=testJobA["id"])
        jobBState = stateDAO.execute(id=testJobB["id"])
        jobCState = stateDAO.execute(id=testJobC["id"])
        jobDState = stateDAO.execute(id=testJobD["id"])

        assert jobAState == "created" and jobBState =="created" and \
               jobCState == "new" and jobDState == "new", \
               "Error: Jobs didn't change state correctly."

        return
Example #36
0
    def testTruncatedWFInsertion(self):
        """
        _testTruncatedWFInsertion_

        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   pnn='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   pnn='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testTopLevelTask, testWMBSHelper.topLevelFileset)

        testWorkload.truncate("ResubmitTestWorkload",
                              "/TestWorkload/ProcessingTask/MergeTask",
                              "someserver", "somedatabase")

        # create  the subscription for multiple top task (MergeTask and CleanupTask for the same block)
        for task in testWorkload.getTopLevelTask():
            testResubmitWMBSHelper = WMBSHelper(testWorkload,
                                                task.name(),
                                                "SomeBlock2",
                                                cachepath=self.workDir)
            testResubmitWMBSHelper.createTopLevelFileset()
            testResubmitWMBSHelper._createSubscriptionsInWMBS(
                task, testResubmitWMBSHelper.topLevelFileset)

        mergeWorkflow = Workflow(name="ResubmitTestWorkload",
                                 task="/ResubmitTestWorkload/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name,
                         "/ResubmitTestWorkload/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="ResubmitTestWorkload",
            task="/ResubmitTestWorkload/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(
            name="ResubmitTestWorkload-MergeTask-SomeBlock2")
        topLevelFileset.loadData()

        mergeSubscription = Subscription(fileset=topLevelFileset,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        return
Example #37
0
    def testCreateSubscription(self):
        """
        _testCreateSubscription_

        Verify that the subscription creation code works correctly.
        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   pnn='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   pnn='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testTopLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/ProcessingTask")
        procWorkflow.load()

        self.assertEqual(procWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner: %s" % procWorkflow.owner)
        self.assertEqual(procWorkflow.group, "DMWM",
                         "Error: Wrong group: %s" % procWorkflow.group)
        self.assertEqual(procWorkflow.wfType, "TestReReco",
                         "Error: Wrong type.")
        self.assertEqual(
            procWorkflow.spec,
            os.path.join(self.workDir, procWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(procWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        mergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "merged_output_fileset"]
        unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "output_fileset"]

        mergedProcOutput.loadData()
        unmergedProcOutput.loadData()

        self.assertEqual(
            mergedProcOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedProcOutput.name,
                         "/TestWorkload/ProcessingTask/unmerged-OutputA",
                         "Error: Unmerged output fileset is wrong.")

        mergeWorkflow = Workflow(name="TestWorkload",
                                 task="/TestWorkload/ProcessingTask/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            cleanupWorkflow.spec,
            os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(
            unmergedMergeOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(len(procSubscription.getWhiteBlackList()), 2,
                         "Error: Wrong site white/black list for proc sub.")
        for site in procSubscription.getWhiteBlackList():
            if site["site_name"] == "site1":
                self.assertEqual(site["valid"], 1,
                                 "Error: Site should be white listed.")
            else:
                self.assertEqual(site["valid"], 0,
                                 "Error: Site should be black listed.")

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        mergeSubscription = Subscription(fileset=unmergedProcOutput,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")
        return
Example #38
0
    def setUp(self):
        """
        _setUp_

        """
        import WMQuality.TestInit
        WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious")

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", "T0.WMBS"])

        self.splitterFactory = SplitterFactory(package="T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        wmbsDaoFactory = DAOFactory(package="WMCore.WMBS",
                                    logger=logging,
                                    dbinterface=myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state, state_time)
                                    VALUES (1, 'SomeSite', 1, 1)
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_pnns
                                    (id, pnn) 
                                    VALUES (1, 'SomePNN')
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 1)
                                    """,
                                 transaction=False)

        insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
        insertRunDAO.execute(binds={
            'RUN': 1,
            'HLTKEY': "someHLTKey"
        },
                             transaction=False)

        insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds={'RUN': 1, 'LUMI': 1}, transaction=False)

        insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
        insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False)

        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        insertStreamerDAO = daoFactory(classname="RunConfig.InsertStreamer")
        insertStreamerDAO.execute(streamerPNN="SomePNN",
                                  binds={
                                      'RUN': 1,
                                      'P5_ID': 1,
                                      'LUMI': 1,
                                      'STREAM': "Express",
                                      'TIME': int(time.time()),
                                      'LFN': "/streamer",
                                      'FILESIZE': 0,
                                      'EVENTS': 0
                                  },
                                  transaction=False)

        insertPromptCalibrationDAO = daoFactory(
            classname="RunConfig.InsertPromptCalibration")
        insertPromptCalibrationDAO.execute(
            {
                'RUN': 1,
                'STREAM': "Express",
                'NUM_PRODUCER': 1
            },
            transaction=False)

        self.completeFilesDAO = wmbsDaoFactory(
            classname="Subscriptions.CompleteFiles")
        self.markPromptCalibrationFinishedDAO = daoFactory(
            classname="ConditionUpload.MarkPromptCalibrationFinished")

        self.fileset1 = Fileset(name="TestFileset1")
        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Condition",
                                          type="Condition")
        self.subscription1.create()

        # set parentage chain and sqlite fileset
        alcaRecoFile = File("/alcareco", size=0, events=0)
        alcaRecoFile.addRun(Run(1, *[1]))
        alcaRecoFile.setLocation("SomePNN", immediateSave=False)
        alcaRecoFile.create()
        alcaPromptFile = File("/alcaprompt", size=0, events=0)
        alcaPromptFile.addRun(Run(1, *[1]))
        alcaPromptFile.setLocation("SomePNN", immediateSave=False)
        alcaPromptFile.create()
        sqliteFile = File("/sqlite", size=0, events=0)
        sqliteFile.create()
        self.fileset1.addFile(sqliteFile)
        self.fileset1.commit()

        results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details
                                              """,
                                           transaction=False)[0].fetchall()

        setParentageDAO = wmbsDaoFactory(classname="Files.SetParentage")
        setParentageDAO.execute(binds=[{
            'parent': "/streamer",
            'child': "/alcareco"
        }, {
            'parent': "/alcareco",
            'child': "/alcaprompt"
        }, {
            'parent': "/alcaprompt",
            'child': "/sqlite"
        }],
                                transaction=False)

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['runNumber'] = 1
        self.splitArgs['streamName'] = "Express"

        return
Example #39
0
class ConditionTest(unittest.TestCase):
    """
    _ExpressTest_

    Test for Express job splitter
    """
    def setUp(self):
        """
        _setUp_

        """
        import WMQuality.TestInit
        WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious")

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", "T0.WMBS"])

        self.splitterFactory = SplitterFactory(package="T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        wmbsDaoFactory = DAOFactory(package="WMCore.WMBS",
                                    logger=logging,
                                    dbinterface=myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state, state_time)
                                    VALUES (1, 'SomeSite', 1, 1)
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_pnns
                                    (id, pnn) 
                                    VALUES (1, 'SomePNN')
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 1)
                                    """,
                                 transaction=False)

        insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
        insertRunDAO.execute(binds={
            'RUN': 1,
            'HLTKEY': "someHLTKey"
        },
                             transaction=False)

        insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds={'RUN': 1, 'LUMI': 1}, transaction=False)

        insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
        insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False)

        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        insertStreamerDAO = daoFactory(classname="RunConfig.InsertStreamer")
        insertStreamerDAO.execute(streamerPNN="SomePNN",
                                  binds={
                                      'RUN': 1,
                                      'P5_ID': 1,
                                      'LUMI': 1,
                                      'STREAM': "Express",
                                      'TIME': int(time.time()),
                                      'LFN': "/streamer",
                                      'FILESIZE': 0,
                                      'EVENTS': 0
                                  },
                                  transaction=False)

        insertPromptCalibrationDAO = daoFactory(
            classname="RunConfig.InsertPromptCalibration")
        insertPromptCalibrationDAO.execute(
            {
                'RUN': 1,
                'STREAM': "Express",
                'NUM_PRODUCER': 1
            },
            transaction=False)

        self.completeFilesDAO = wmbsDaoFactory(
            classname="Subscriptions.CompleteFiles")
        self.markPromptCalibrationFinishedDAO = daoFactory(
            classname="ConditionUpload.MarkPromptCalibrationFinished")

        self.fileset1 = Fileset(name="TestFileset1")
        self.fileset1.create()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Condition",
                                          type="Condition")
        self.subscription1.create()

        # set parentage chain and sqlite fileset
        alcaRecoFile = File("/alcareco", size=0, events=0)
        alcaRecoFile.addRun(Run(1, *[1]))
        alcaRecoFile.setLocation("SomePNN", immediateSave=False)
        alcaRecoFile.create()
        alcaPromptFile = File("/alcaprompt", size=0, events=0)
        alcaPromptFile.addRun(Run(1, *[1]))
        alcaPromptFile.setLocation("SomePNN", immediateSave=False)
        alcaPromptFile.create()
        sqliteFile = File("/sqlite", size=0, events=0)
        sqliteFile.create()
        self.fileset1.addFile(sqliteFile)
        self.fileset1.commit()

        results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details
                                              """,
                                           transaction=False)[0].fetchall()

        setParentageDAO = wmbsDaoFactory(classname="Files.SetParentage")
        setParentageDAO.execute(binds=[{
            'parent': "/streamer",
            'child': "/alcareco"
        }, {
            'parent': "/alcareco",
            'child': "/alcaprompt"
        }, {
            'parent': "/alcaprompt",
            'child': "/sqlite"
        }],
                                transaction=False)

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['runNumber'] = 1
        self.splitArgs['streamName'] = "Express"

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def isPromptCalibFinished(self):
        """
        _isPromptCalibFinished_

        """
        myThread = threading.currentThread()

        result = myThread.dbi.processData(
            """SELECT finished
                                             FROM prompt_calib
                                             """,
            transaction=False)[0].fetchall()[0][0]

        return result

    def countPromptCalibFiles(self):
        """
        _deleteSplitLumis_

        """
        myThread = threading.currentThread()

        result = myThread.dbi.processData(
            """SELECT COUNT(*)
                                             FROM prompt_calib_file
                                             """,
            transaction=False)[0].fetchall()[0][0]

        return result

    def test00(self):
        """
        _test00_

        Make sure the job splitter behaves correctly.

        Just make sure the job splitter does nothing
        when the fileset is open and populates t0ast
        data structures when it's closed. In the later
        case all input files should be marked as
        acquired without creating a job as well.

        """
        mySplitArgs = self.splitArgs.copy()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.assertEqual(self.isPromptCalibFinished(), 0,
                         "ERROR: prompt_calib should not be finished")

        self.assertEqual(self.countPromptCalibFiles(), 0,
                         "ERROR: there should be no prompt_calib_file")

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(self.isPromptCalibFinished(), 0,
                         "ERROR: prompt_calib should not be finished")

        self.assertEqual(self.countPromptCalibFiles(), 1,
                         "ERROR: there should be one prompt_calib_file")

        self.completeFilesDAO.execute(1, 4, transaction=False)
        self.markPromptCalibrationFinishedDAO.execute(1, 1, transaction=False)

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.assertEqual(self.isPromptCalibFinished(), 1,
                         "ERROR: prompt_calib should be finished")

        self.assertEqual(self.countPromptCalibFiles(), 1,
                         "ERROR: there should be one prompt_calib_file")

        return
Example #40
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()

        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN2')
                                    """, transaction = False)


        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 2 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 3 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 4 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 5 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "A" },
                                transaction = False)

        insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" },
                                     transaction = False)

        insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion")
        insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1,
                                                      'STREAM' : 'A',
                                                      'VERSION' : "CMSSW_4_2_7" },
                                            transaction = False)

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : 4,
                                            'STREAM' : "A",
                                            'LFN' : "/testLFN/A",
                                            'FILESIZE' : 100,
                                            'EVENTS' : 100,
                                            'TIME' : int(time.time()) },
                                  transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset2 = Fileset(name = "TestFileset2")
        self.fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Repack",
                                           type = "Repack")
        self.subscription2  = Subscription(fileset = self.fileset2,
                                           workflow = workflow2,
                                           split_algo = "RepackMerge",
                                           type = "RepackMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction = False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers")                                                      
        self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles")
        self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024
        self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 100000000
        self.splitArgs['maxInputFiles'] = 1000
        self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024

        return
Example #41
0
    def stuffWMBS(self, injected=True):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        self.mergeMergedFileset = Fileset(name="mergeMergedFileset")
        self.mergeMergedFileset.create()
        self.bogusMergedFileset = Fileset(name="bogusMergedFileset")
        self.bogusMergedFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2",
                                 owner="Steve", task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(classname="Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names=[mergeWorkflow.name], injected=injected)

        self.mergeSubscription = Subscription(fileset=self.mergeFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset=self.bogusFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputWorkflow = Workflow(name="inputWorkflow", spec="input",
                                 owner="Steve", task="Test")
        inputWorkflow.create()
        inputWorkflow.addOutput("output", self.mergeFileset,
                                self.mergeMergedFileset)
        inputWorkflow.addOutput("output2", self.bogusFileset,
                                self.bogusMergedFileset)
        bogusInputWorkflow = Workflow(name="bogusInputWorkflow", spec="input",
                                      owner="Steve", task="Test")
        bogusInputWorkflow.create()

        inputSubscription = Subscription(fileset=inputFileset,
                                         workflow=inputWorkflow)
        inputSubscription.create()
        bogusInputSubscription = Subscription(fileset=inputFileset,
                                              workflow=bogusInputWorkflow)
        bogusInputSubscription.create()

        parentFile1 = File(lfn="parentFile1")
        parentFile1.create()
        parentFile2 = File(lfn="parentFile2")
        parentFile2.create()
        parentFile3 = File(lfn="parentFile3")
        parentFile3.create()
        parentFile4 = File(lfn="parentFile4")
        parentFile4.create()
        self.parentFileSite2 = File(lfn="parentFileSite2")
        self.parentFileSite2.create()

        jobGroup1 = JobGroup(subscription=inputSubscription)
        jobGroup1.create()
        jobGroup2 = JobGroup(subscription=inputSubscription)
        jobGroup2.create()
        jobGroup3 = JobGroup(subscription=bogusInputSubscription)
        jobGroup3.create()

        testJob1 = Job()
        testJob1.addFile(parentFile1)
        testJob1.create(jobGroup1)
        testJob1["state"] = "cleanout"
        testJob1["oldstate"] = "new"
        testJob1["couch_record"] = "somejive"
        testJob1["retry_count"] = 0
        testJob1["outcome"] = "success"
        testJob1.save()
        changeStateDAO.execute([testJob1])

        testJob1A = Job()
        testJob1A.addFile(parentFile1)
        testJob1A.create(jobGroup3)
        testJob1A["state"] = "cleanout"
        testJob1A["oldstate"] = "new"
        testJob1A["couch_record"] = "somejive"
        testJob1A["retry_count"] = 0
        testJob1A["outcome"] = "failure"
        testJob1A.save()
        changeStateDAO.execute([testJob1A])

        testJob2 = Job()
        testJob2.addFile(parentFile2)
        testJob2.create(jobGroup1)
        testJob2["state"] = "cleanout"
        testJob2["oldstate"] = "new"
        testJob2["couch_record"] = "somejive"
        testJob2["retry_count"] = 0
        testJob2["outcome"] = "success"
        testJob2.save()
        changeStateDAO.execute([testJob2])

        testJob3 = Job()
        testJob3.addFile(parentFile3)
        testJob3.create(jobGroup2)
        testJob3["state"] = "cleanout"
        testJob3["oldstate"] = "new"
        testJob3["couch_record"] = "somejive"
        testJob3["retry_count"] = 0
        testJob3["outcome"] = "success"
        testJob3.save()
        changeStateDAO.execute([testJob3])

        testJob4 = Job()
        testJob4.addFile(parentFile4)
        testJob4.create(jobGroup2)
        testJob4["state"] = "cleanout"
        testJob4["oldstate"] = "new"
        testJob4["couch_record"] = "somejive"
        testJob4["retry_count"] = 0
        testJob4["outcome"] = "failure"
        testJob4.save()
        changeStateDAO.execute([testJob4])

        # We'll simulate a failed split by event job that the merger should
        # ignore.
        parentFile5 = File(lfn="parentFile5")
        parentFile5.create()

        testJob5 = Job()
        testJob5.addFile(parentFile5)
        testJob5.create(jobGroup2)
        testJob5["state"] = "cleanout"
        testJob5["oldstate"] = "new"
        testJob5["couch_record"] = "somejive"
        testJob5["retry_count"] = 0
        testJob5["outcome"] = "success"
        testJob5.save()
        changeStateDAO.execute([testJob5])

        testJob6 = Job()
        testJob6.addFile(parentFile5)
        testJob6.create(jobGroup2)
        testJob6["state"] = "cleanout"
        testJob6["oldstate"] = "new"
        testJob6["couch_record"] = "somejive"
        testJob6["retry_count"] = 0
        testJob6["outcome"] = "failure"
        testJob6.save()
        changeStateDAO.execute([testJob6])

        testJob7 = Job()
        testJob7.addFile(self.parentFileSite2)
        testJob7.create(jobGroup2)
        testJob7["state"] = "cleanout"
        testJob7["oldstate"] = "new"
        testJob7["couch_record"] = "somejive"
        testJob7["retry_count"] = 0
        testJob7["outcome"] = "success"
        testJob7.save()
        changeStateDAO.execute([testJob7])

        badFile1 = File(lfn="badFile1", size=10241024, events=10241024,
                        first_event=0, locations={"T2_CH_CERN"})
        badFile1.addRun(Run(1, *[45]))
        badFile1.create()
        badFile1.addParent(parentFile5["lfn"])

        file1 = File(lfn="file1", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        file1.addRun(Run(1, *[45]))
        file1.create()
        file1.addParent(parentFile1["lfn"])
        file2 = File(lfn="file2", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        file2.addRun(Run(1, *[45]))
        file2.create()
        file2.addParent(parentFile1["lfn"])
        file3 = File(lfn="file3", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        file3.addRun(Run(1, *[45]))
        file3.create()
        file3.addParent(parentFile1["lfn"])
        file4 = File(lfn="file4", size=1024, events=1024,
                     first_event=3072, locations={"T2_CH_CERN"})
        file4.addRun(Run(1, *[45]))
        file4.create()
        file4.addParent(parentFile1["lfn"])

        fileA = File(lfn="fileA", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileA.addParent(parentFile2["lfn"])
        fileB = File(lfn="fileB", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileB.addParent(parentFile2["lfn"])
        fileC = File(lfn="fileC", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        fileC.addParent(parentFile2["lfn"])

        fileI = File(lfn="fileI", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileI.addParent(parentFile3["lfn"])
        fileII = File(lfn="fileII", size=1024, events=1024,
                      first_event=1024, locations={"T2_CH_CERN"})
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileII.addParent(parentFile3["lfn"])
        fileIII = File(lfn="fileIII", size=1024, events=1024,
                       first_event=2048, locations={"T2_CH_CERN"})
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIII.addParent(parentFile3["lfn"])
        fileIV = File(lfn="fileIV", size=1024, events=1024,
                      first_event=3072, locations={"T2_CH_CERN"})
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()
        fileIV.addParent(parentFile3["lfn"])

        fileX = File(lfn="badFileA", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileX.addRun(Run(1, *[47]))
        fileX.create()
        fileX.addParent(parentFile4["lfn"])
        fileY = File(lfn="badFileB", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        fileY.addRun(Run(1, *[47]))
        fileY.create()
        fileY.addParent(parentFile4["lfn"])
        fileZ = File(lfn="badFileC", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        fileZ.addRun(Run(1, *[47]))
        fileZ.create()
        fileZ.addParent(parentFile4["lfn"])

        jobGroup1.output.addFile(file1)
        jobGroup1.output.addFile(file2)
        jobGroup1.output.addFile(file3)
        jobGroup1.output.addFile(file4)
        jobGroup1.output.addFile(fileA)
        jobGroup1.output.addFile(fileB)
        jobGroup1.output.addFile(fileC)
        jobGroup1.output.commit()

        jobGroup2.output.addFile(fileI)
        jobGroup2.output.addFile(fileII)
        jobGroup2.output.addFile(fileIII)
        jobGroup2.output.addFile(fileIV)
        jobGroup2.output.addFile(fileX)
        jobGroup2.output.addFile(fileY)
        jobGroup2.output.addFile(fileZ)
        jobGroup2.output.addFile(badFile1)
        jobGroup2.output.commit()

        for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI,
                        fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]:
            self.mergeFileset.addFile(fileObj)
            self.bogusFileset.addFile(fileObj)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return
Example #42
0
class WMBSMergeBySize(unittest.TestCase):
    def setUp(self):
        """
        _setUp_

        Boiler plate DB setup.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def stuffWMBS(self, injected=True):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        self.mergeMergedFileset = Fileset(name="mergeMergedFileset")
        self.mergeMergedFileset.create()
        self.bogusMergedFileset = Fileset(name="bogusMergedFileset")
        self.bogusMergedFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow", spec="bunk2",
                                 owner="Steve", task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(classname="Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names=[mergeWorkflow.name], injected=injected)

        self.mergeSubscription = Subscription(fileset=self.mergeFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset=self.bogusFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="WMBSMergeBySize")

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputWorkflow = Workflow(name="inputWorkflow", spec="input",
                                 owner="Steve", task="Test")
        inputWorkflow.create()
        inputWorkflow.addOutput("output", self.mergeFileset,
                                self.mergeMergedFileset)
        inputWorkflow.addOutput("output2", self.bogusFileset,
                                self.bogusMergedFileset)
        bogusInputWorkflow = Workflow(name="bogusInputWorkflow", spec="input",
                                      owner="Steve", task="Test")
        bogusInputWorkflow.create()

        inputSubscription = Subscription(fileset=inputFileset,
                                         workflow=inputWorkflow)
        inputSubscription.create()
        bogusInputSubscription = Subscription(fileset=inputFileset,
                                              workflow=bogusInputWorkflow)
        bogusInputSubscription.create()

        parentFile1 = File(lfn="parentFile1")
        parentFile1.create()
        parentFile2 = File(lfn="parentFile2")
        parentFile2.create()
        parentFile3 = File(lfn="parentFile3")
        parentFile3.create()
        parentFile4 = File(lfn="parentFile4")
        parentFile4.create()
        self.parentFileSite2 = File(lfn="parentFileSite2")
        self.parentFileSite2.create()

        jobGroup1 = JobGroup(subscription=inputSubscription)
        jobGroup1.create()
        jobGroup2 = JobGroup(subscription=inputSubscription)
        jobGroup2.create()
        jobGroup3 = JobGroup(subscription=bogusInputSubscription)
        jobGroup3.create()

        testJob1 = Job()
        testJob1.addFile(parentFile1)
        testJob1.create(jobGroup1)
        testJob1["state"] = "cleanout"
        testJob1["oldstate"] = "new"
        testJob1["couch_record"] = "somejive"
        testJob1["retry_count"] = 0
        testJob1["outcome"] = "success"
        testJob1.save()
        changeStateDAO.execute([testJob1])

        testJob1A = Job()
        testJob1A.addFile(parentFile1)
        testJob1A.create(jobGroup3)
        testJob1A["state"] = "cleanout"
        testJob1A["oldstate"] = "new"
        testJob1A["couch_record"] = "somejive"
        testJob1A["retry_count"] = 0
        testJob1A["outcome"] = "failure"
        testJob1A.save()
        changeStateDAO.execute([testJob1A])

        testJob2 = Job()
        testJob2.addFile(parentFile2)
        testJob2.create(jobGroup1)
        testJob2["state"] = "cleanout"
        testJob2["oldstate"] = "new"
        testJob2["couch_record"] = "somejive"
        testJob2["retry_count"] = 0
        testJob2["outcome"] = "success"
        testJob2.save()
        changeStateDAO.execute([testJob2])

        testJob3 = Job()
        testJob3.addFile(parentFile3)
        testJob3.create(jobGroup2)
        testJob3["state"] = "cleanout"
        testJob3["oldstate"] = "new"
        testJob3["couch_record"] = "somejive"
        testJob3["retry_count"] = 0
        testJob3["outcome"] = "success"
        testJob3.save()
        changeStateDAO.execute([testJob3])

        testJob4 = Job()
        testJob4.addFile(parentFile4)
        testJob4.create(jobGroup2)
        testJob4["state"] = "cleanout"
        testJob4["oldstate"] = "new"
        testJob4["couch_record"] = "somejive"
        testJob4["retry_count"] = 0
        testJob4["outcome"] = "failure"
        testJob4.save()
        changeStateDAO.execute([testJob4])

        # We'll simulate a failed split by event job that the merger should
        # ignore.
        parentFile5 = File(lfn="parentFile5")
        parentFile5.create()

        testJob5 = Job()
        testJob5.addFile(parentFile5)
        testJob5.create(jobGroup2)
        testJob5["state"] = "cleanout"
        testJob5["oldstate"] = "new"
        testJob5["couch_record"] = "somejive"
        testJob5["retry_count"] = 0
        testJob5["outcome"] = "success"
        testJob5.save()
        changeStateDAO.execute([testJob5])

        testJob6 = Job()
        testJob6.addFile(parentFile5)
        testJob6.create(jobGroup2)
        testJob6["state"] = "cleanout"
        testJob6["oldstate"] = "new"
        testJob6["couch_record"] = "somejive"
        testJob6["retry_count"] = 0
        testJob6["outcome"] = "failure"
        testJob6.save()
        changeStateDAO.execute([testJob6])

        testJob7 = Job()
        testJob7.addFile(self.parentFileSite2)
        testJob7.create(jobGroup2)
        testJob7["state"] = "cleanout"
        testJob7["oldstate"] = "new"
        testJob7["couch_record"] = "somejive"
        testJob7["retry_count"] = 0
        testJob7["outcome"] = "success"
        testJob7.save()
        changeStateDAO.execute([testJob7])

        badFile1 = File(lfn="badFile1", size=10241024, events=10241024,
                        first_event=0, locations={"T2_CH_CERN"})
        badFile1.addRun(Run(1, *[45]))
        badFile1.create()
        badFile1.addParent(parentFile5["lfn"])

        file1 = File(lfn="file1", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        file1.addRun(Run(1, *[45]))
        file1.create()
        file1.addParent(parentFile1["lfn"])
        file2 = File(lfn="file2", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        file2.addRun(Run(1, *[45]))
        file2.create()
        file2.addParent(parentFile1["lfn"])
        file3 = File(lfn="file3", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        file3.addRun(Run(1, *[45]))
        file3.create()
        file3.addParent(parentFile1["lfn"])
        file4 = File(lfn="file4", size=1024, events=1024,
                     first_event=3072, locations={"T2_CH_CERN"})
        file4.addRun(Run(1, *[45]))
        file4.create()
        file4.addParent(parentFile1["lfn"])

        fileA = File(lfn="fileA", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileA.addParent(parentFile2["lfn"])
        fileB = File(lfn="fileB", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileB.addParent(parentFile2["lfn"])
        fileC = File(lfn="fileC", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        fileC.addParent(parentFile2["lfn"])

        fileI = File(lfn="fileI", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileI.addParent(parentFile3["lfn"])
        fileII = File(lfn="fileII", size=1024, events=1024,
                      first_event=1024, locations={"T2_CH_CERN"})
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileII.addParent(parentFile3["lfn"])
        fileIII = File(lfn="fileIII", size=1024, events=1024,
                       first_event=2048, locations={"T2_CH_CERN"})
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIII.addParent(parentFile3["lfn"])
        fileIV = File(lfn="fileIV", size=1024, events=1024,
                      first_event=3072, locations={"T2_CH_CERN"})
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()
        fileIV.addParent(parentFile3["lfn"])

        fileX = File(lfn="badFileA", size=1024, events=1024,
                     first_event=0, locations={"T2_CH_CERN"})
        fileX.addRun(Run(1, *[47]))
        fileX.create()
        fileX.addParent(parentFile4["lfn"])
        fileY = File(lfn="badFileB", size=1024, events=1024,
                     first_event=1024, locations={"T2_CH_CERN"})
        fileY.addRun(Run(1, *[47]))
        fileY.create()
        fileY.addParent(parentFile4["lfn"])
        fileZ = File(lfn="badFileC", size=1024, events=1024,
                     first_event=2048, locations={"T2_CH_CERN"})
        fileZ.addRun(Run(1, *[47]))
        fileZ.create()
        fileZ.addParent(parentFile4["lfn"])

        jobGroup1.output.addFile(file1)
        jobGroup1.output.addFile(file2)
        jobGroup1.output.addFile(file3)
        jobGroup1.output.addFile(file4)
        jobGroup1.output.addFile(fileA)
        jobGroup1.output.addFile(fileB)
        jobGroup1.output.addFile(fileC)
        jobGroup1.output.commit()

        jobGroup2.output.addFile(fileI)
        jobGroup2.output.addFile(fileII)
        jobGroup2.output.addFile(fileIII)
        jobGroup2.output.addFile(fileIV)
        jobGroup2.output.addFile(fileX)
        jobGroup2.output.addFile(fileY)
        jobGroup2.output.addFile(fileZ)
        jobGroup2.output.addFile(badFile1)
        jobGroup2.output.commit()

        for fileObj in [file1, file2, file3, file4, fileA, fileB, fileC, fileI,
                        fileII, fileIII, fileIV, fileX, fileY, fileZ, badFile1]:
            self.mergeFileset.addFile(fileObj)
            self.bogusFileset.addFile(fileObj)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return

    def testMinMergeSize1(self):
        """
        _testMinMergeSize1_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance.  Verify that no merge jobs
        will be produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=20000, max_merge_size=2000000000,
                            max_merge_events=200000000)

        assert len(result) == 0, \
            "ERROR: No job groups should be returned."

        return

    def testMinMergeSize1a(self):
        """
        _testMinMergeSize1a_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance and mark the fileset as
        closed.  Verify that one job containing all files is pushed out.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=20000, max_merge_size=200000,
                            max_merge_events=20000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % len(result)

        assert len(result[0].jobs) == 2, \
            "Error: Two jobs should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                        "fileC"]
        goldenFilesB = ["fileI", "fileII", "fileIII", "fileIV"]

        for job in result[0].jobs:

            self.assertEqual(job["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

            jobFiles = job.getFiles()

            if len(jobFiles) == len(goldenFilesA):
                self.assertEqual(job["estimatedDiskUsage"], 7)
                goldenFiles = goldenFilesA
            else:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesB

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for fileObj in jobFiles:
                fileObj.loadData()
                assert fileObj["lfn"] in goldenFiles, \
                    "Error: Unknown file: %s" % fileObj["lfn"]
                goldenFiles.remove(fileObj["lfn"])

                fileRun = list(fileObj["runs"])[0].run
                fileLumi = min(list(fileObj["runs"])[0])
                fileEvent = fileObj["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                    "ERROR: Files not sorted by run."

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                        "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        return

    def testMinMergeSize2(self):
        """
        _testMinMergeSize2_

        Set the minimum merge size to be 7,167 bytes which is one byte less
        than the sum of all the file sizes in the largest merge group in the
        WMBS instance.  Verify that one merge job containing all the files in
        the largest merge group is produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=7167, max_merge_size=20000,
                            max_merge_events=20000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %d" % len(result)

        assert len(result[0].jobs) == 1, \
            "ERROR: One job should have been returned."

        self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 7)

        self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

        jobFiles = list(result[0].jobs)[0].getFiles()

        goldenFiles = ["file1", "file2", "file3", "file4", "fileA", "fileB",
                       "fileC"]

        assert len(jobFiles) == len(goldenFiles), \
            "ERROR: Merge job should contain %d files." % len(goldenFiles)

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for fileObj in jobFiles:
            assert fileObj["lfn"] in goldenFiles, \
                "Error: Unknown file: %s" % fileObj["lfn"]
            goldenFiles.remove(fileObj["lfn"])

            fileRun = list(fileObj["runs"])[0].run
            fileLumi = min(list(fileObj["runs"])[0])
            fileEvent = fileObj["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                    "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        return

    def testMaxMergeSize1(self):
        """
        _testMaxMergeSize1_

        Set the maximum merge size to be two bytes.  Verify that three merge
        jobs are created, one for each job group that exists inside the WMBS
        instance.  Verify that each merge job contains the expected files.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1, max_merge_size=2,
                            max_merge_events=20000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned."

        self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        for job in result[0].jobs:
            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                self.assertEqual(job["estimatedDiskUsage"], 3)
                goldenFiles = goldenFilesB
            else:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for fileObj in jobFiles:
                assert fileObj["lfn"] in goldenFiles, \
                    "Error: Unknown file in merge jobs."
                goldenFiles.remove(fileObj["lfn"])

                fileRun = list(fileObj["runs"])[0].run
                fileLumi = min(list(fileObj["runs"])[0])
                fileEvent = fileObj["first_event"]

                if currentRun == 0:
                    continue

                assert fileRun >= currentRun, \
                    "ERROR: Files not sorted by run."

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                        "ERROR: Files not ordered by lumi"

                    if fileLumi == currentLumi:
                        assert fileEvent >= currentEvent, \
                            "ERROR: Files not ordered by first event"

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               len(goldenFilesC) == 0, \
            "ERROR: Files missing from merge jobs."

        return

    def testMaxMergeSize2(self):
        """
        _testMaxMergeSize2_

        Set the minimum merge size to be one byte larger than the largest job
        group in the WMBS instance and the max merge size to be one byte larger
        than the total size of two of the groups.  Verify that one merge job
        is produced with two of the job groups in it.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097, max_merge_size=7169,
                            max_merge_events=20000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 1, \
            "ERROR: One job should have been returned."

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 7)

        self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

        jobFiles = list(result[0].jobs)[0].getFiles()

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for fileObj in jobFiles:

            if fileObj["lfn"] in goldenFilesA:
                goldenFilesA.remove(fileObj["lfn"])
            elif fileObj["lfn"] in goldenFilesB:
                goldenFilesB.remove(fileObj["lfn"])
            elif fileObj["lfn"] in goldenFilesC:
                goldenFilesC.remove(fileObj["lfn"])

            fileRun = list(fileObj["runs"])[0].run
            fileLumi = min(list(fileObj["runs"])[0])
            fileEvent = fileObj["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                    "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesB) == 0 and \
               (len(goldenFilesA) == 0 or len(goldenFilesC) == 0), \
            "ERROR: Files not allocated to jobs correctly."

        return

    def testMaxEvents1(self):
        """
        _testMaxEvents1_

        Set the maximum number of events per merge job to 1.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=1)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned: %s" % len(result[0].jobs)

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        for job in result[0].jobs:

            self.assertEqual(job["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                self.assertEqual(job["estimatedDiskUsage"], 3)
                goldenFiles = goldenFilesB
            else:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for fileObj in jobFiles:
                assert fileObj["lfn"] in goldenFiles, \
                    "Error: Unknown file in merge jobs."
                goldenFiles.remove(fileObj["lfn"])

                fileRun = list(fileObj["runs"])[0].run
                fileLumi = min(list(fileObj["runs"])[0])
                fileEvent = fileObj["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                    "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun)

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                        "ERROR: Files not ordered by lumi"

                    if fileLumi == currentLumi:
                        assert fileEvent >= currentEvent, \
                            "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               len(goldenFilesC) == 0, \
            "ERROR: Files missing from merge jobs."

        return

    def testMaxEvents2(self):
        """
        _testMaxEvents2_

        Set the minimum merge size to be one byte larger than the largest job
        group in the WMBS instance and the max events to be one event larger
        than the total events in two of the groups.  Verify that one merge job
        is produced with two of the job groups in it.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 1, \
            "ERROR: One job should have been returned."

        self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 7)

        self.assertEqual(result[0].jobs[0]["possiblePSN"], {"T1_US_FNAL", "T2_CH_CERN"})

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        jobFiles = list(result[0].jobs)[0].getFiles()

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for fileObj in jobFiles:

            if fileObj["lfn"] in goldenFilesA:
                goldenFilesA.remove(fileObj["lfn"])
            elif fileObj["lfn"] in goldenFilesB:
                goldenFilesB.remove(fileObj["lfn"])
            elif fileObj["lfn"] in goldenFilesC:
                goldenFilesC.remove(fileObj["lfn"])

            fileRun = list(fileObj["runs"])[0].run
            fileLumi = min(list(fileObj["runs"])[0])
            fileEvent = fileObj["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                    "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesB) == 0 and \
               (len(goldenFilesA) == 0 or len(goldenFilesC) == 0), \
            "ERROR: Files not allocated to jobs correctly."

        return

    def testParallelProcessing(self):
        """
        _testParallelProcessing_

        Verify that merging works correctly when multiple processing
        subscriptions are run over the same input files.  The merging algorithm
        should ignore processing jobs that feed into different merge
        subscriptions.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        mergeFilesetA = Fileset(name="mergeFilesetA")
        mergeFilesetB = Fileset(name="mergeFilesetB")
        mergeFilesetA.create()
        mergeFilesetB.create()

        mergeMergedFilesetA = Fileset(name="mergeMergedFilesetA")
        mergeMergedFilesetB = Fileset(name="mergeMergedFilesetB")
        mergeMergedFilesetA.create()
        mergeMergedFilesetB.create()

        mergeWorkflow = Workflow(name="mergeWorkflow", spec="bogus",
                                 owner="Steve", task="Test")
        mergeWorkflow.create()

        mergeSubscriptionA = Subscription(fileset=mergeFilesetA,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionB = Subscription(fileset=mergeFilesetB,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionA.create()
        mergeSubscriptionB.create()

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputFileA = File(lfn="inputLFNA")
        inputFileB = File(lfn="inputLFNB")
        inputFileA.create()
        inputFileB.create()

        procWorkflowA = Workflow(name="procWorkflowA", spec="bunk2",
                                 owner="Steve", task="Test")
        procWorkflowA.create()
        procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA)
        procWorkflowB = Workflow(name="procWorkflowB", spec="bunk3",
                                 owner="Steve", task="Test2")
        procWorkflowB.create()
        procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB)

        procSubscriptionA = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowA,
                                         split_algo="EventBased")
        procSubscriptionA.create()
        procSubscriptionB = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowB,
                                         split_algo="EventBased")
        procSubscriptionB.create()

        jobGroupA = JobGroup(subscription=procSubscriptionA)
        jobGroupA.create()
        jobGroupB = JobGroup(subscription=procSubscriptionB)
        jobGroupB.create()

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        testJobA = Job()
        testJobA.addFile(inputFileA)
        testJobA.create(jobGroupA)
        testJobA["state"] = "cleanout"
        testJobA["oldstate"] = "new"
        testJobA["couch_record"] = "somejive"
        testJobA["retry_count"] = 0
        testJobA["outcome"] = "success"
        testJobA.save()

        testJobB = Job()
        testJobB.addFile(inputFileB)
        testJobB.create(jobGroupA)
        testJobB["state"] = "cleanout"
        testJobB["oldstate"] = "new"
        testJobB["couch_record"] = "somejive"
        testJobB["retry_count"] = 0
        testJobB["outcome"] = "success"
        testJobB.save()

        testJobC = Job()
        testJobC.addFile(inputFileA)
        testJobC.create(jobGroupB)
        testJobC["state"] = "cleanout"
        testJobC["oldstate"] = "new"
        testJobC["couch_record"] = "somejive"
        testJobC["retry_count"] = 0
        testJobC["outcome"] = "success"
        testJobC.save()

        testJobD = Job()
        testJobD.addFile(inputFileA)
        testJobD.create(jobGroupB)
        testJobD["state"] = "cleanout"
        testJobD["oldstate"] = "new"
        testJobD["couch_record"] = "somejive"
        testJobD["retry_count"] = 0
        testJobD["outcome"] = "failure"
        testJobD.save()

        testJobE = Job()
        testJobE.addFile(inputFileB)
        testJobE.create(jobGroupB)
        testJobE["state"] = "cleanout"
        testJobE["oldstate"] = "new"
        testJobE["couch_record"] = "somejive"
        testJobE["retry_count"] = 0
        testJobE["outcome"] = "success"
        testJobE.save()

        testJobF = Job()
        testJobF.addFile(inputFileB)
        testJobF.create(jobGroupB)
        testJobF["state"] = "cleanout"
        testJobF["oldstate"] = "new"
        testJobF["couch_record"] = "somejive"
        testJobF["retry_count"] = 0
        testJobF["outcome"] = "failure"
        testJobF.save()

        changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD,
                                testJobE, testJobF])

        fileA = File(lfn="fileA", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileA.addRun(Run(1, *[45]))
        fileA.create()
        fileA.addParent(inputFileA["lfn"])
        fileB = File(lfn="fileB", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileB.addRun(Run(1, *[45]))
        fileB.create()
        fileB.addParent(inputFileB["lfn"])

        jobGroupA.output.addFile(fileA)
        jobGroupA.output.addFile(fileB)
        jobGroupA.output.commit()

        mergeFilesetA.addFile(fileA)
        mergeFilesetA.addFile(fileB)
        mergeFilesetA.commit()

        fileC = File(lfn="fileC", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileC.addRun(Run(1, *[45]))
        fileC.create()
        fileC.addParent(inputFileA["lfn"])
        fileD = File(lfn="fileD", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileD.addRun(Run(1, *[45]))
        fileD.create()
        fileD.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileC)
        jobGroupB.output.addFile(fileD)

        mergeFilesetB.addFile(fileC)
        mergeFilesetB.addFile(fileD)
        mergeFilesetB.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=mergeSubscriptionB)

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 0, \
            "Error: No merge jobs should have been created."

        fileE = File(lfn="fileE", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileE.addRun(Run(1, *[45]))
        fileE.create()
        fileE.addParent(inputFileA["lfn"])
        fileF = File(lfn="fileF", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileF.addRun(Run(1, *[45]))
        fileF.create()
        fileF.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileE)
        jobGroupB.output.addFile(fileF)

        mergeFilesetB.addFile(fileE)
        mergeFilesetB.addFile(fileF)
        mergeFilesetB.commit()

        testJobD["outcome"] = "success"
        testJobD.save()
        testJobF["outcome"] = "success"
        testJobF.save()

        changeStateDAO.execute([testJobD, testJobF])

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 1, \
            "Error: One merge job should have been created: %s" % len(result)

        return

    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk")

        fileSite2 = File(lfn="fileSite2", size=4098, events=1024,
                         first_event=0, locations={"T1_UK_RAL_Disk"})
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()
        fileSite2.addParent(self.parentFileSite2["lfn"])

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097, max_merge_size=99999999,
                            max_merge_events=999999999)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 2, \
            "ERROR: Two jobs should have been returned."

        ralJobs = 0
        fnalcernJobs = 0
        for job in result[0].jobs:
            if job["possiblePSN"] == {"T1_UK_RAL"}:
                ralJobs += 1
            elif job["possiblePSN"] == {"T1_US_FNAL", "T2_CH_CERN"}:
                fnalcernJobs += 1

        self.assertEqual(ralJobs, 1)
        self.assertEqual(fnalcernJobs, 1)

        return

    def testFilesetCloseout(self):
        """
        _testFilesetCloseout_

        Verify that the merge algorithm works correctly when it's input fileset
        is closed.  The split algorithm should create merge jobs for all files
        regardless of size and then mark any orphaned files (files that are the
        result of a split by lumi / split by event where one of the parent
        processing jobs has failed while others have succeeded) as failed so
        that the fileset closing works.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        # Get out all the good merge jobs out of the way.
        result = jobFactory(min_merge_size=1, max_merge_size=999999999999,
                            max_merge_events=999999999)

        # Verify that the bad files are the only "available" files
        availableAction = self.daoFactory(classname="Subscriptions.GetAvailableFilesMeta")
        availFiles = availableAction.execute(self.mergeSubscription["id"])

        assert len(availFiles) == 4, \
            "Error: Wrong number of available files."

        goldenFiles = ["badFile1", "badFileA", "badFileB", "badFileC"]
        for availFile in availFiles:
            assert availFile["lfn"] in goldenFiles, \
                "Error: Extra file is available."

            goldenFiles.remove(availFile["lfn"])

        self.mergeFileset.markOpen(False)
        result = jobFactory(min_merge_size=1, max_merge_size=999999999999,
                            max_merge_events=999999999)

        assert len(result) == 0, \
            "Error: Merging should have returned zero jobs."

        self.mergeFileset.markOpen(False)

        availFiles2 = availableAction.execute(self.mergeSubscription["id"])

        assert len(availFiles2) == 0, \
            "Error: There should be no more available files."

        failedAction = self.daoFactory(classname="Subscriptions.GetFailedFiles")
        failedFiles = failedAction.execute(self.mergeSubscription["id"])

        assert len(failedFiles) == 4, \
            "Error: Wrong number of failed files: %s" % failedFiles

        goldenIDs = []
        for availFile in availFiles:
            goldenIDs.append(availFile["id"])

        for failedFile in failedFiles:
            assert failedFile["file"] in goldenIDs, \
                "Error: Extra failed file."

        return

    def testFilesetCloseout2(self):
        """
        _testFilesetCloseout2_

        Verify that the fail orphan file code does not fail files that have
        failed for other workflows.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        # Get out all the good merge jobs out of the way.
        result = jobFactory(min_merge_size=1, max_merge_size=999999999999,
                            max_merge_events=999999999)

        self.assertEqual(len(result), 1, "Error: Wrong number of job groups.")
        self.assertEqual(len(result[0].jobs), 2, "Error: Wrong number of jobs.")

        failedAction = self.daoFactory(classname="Subscriptions.GetFailedFiles")
        failedFiles = failedAction.execute(self.mergeSubscription["id"])

        self.assertEqual(len(failedFiles), 4,
                         "Error: Wrong number of failed files: %s" % failedFiles)
        return

    def testForcedMerge(self):
        """
        _testForcedMerge_

        Repeat testMinMergeSize1a, but with non-injected files to assert that
        this causes no jobgroups to be created.
        """
        self.stuffWMBS(injected=False)
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=20000, max_merge_size=200000,
                            max_merge_events=20000)

        self.assertEqual(len(result), 0)

        return
Example #43
0
    def stuffWMBS(self):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T1_US_FNAL_Disk")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow",
                                 spec="bunk2",
                                 owner="Steve",
                                 task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(
            classname="Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names=[mergeWorkflow.name], injected=True)

        self.mergeSubscription = Subscription(
            fileset=self.mergeFileset,
            workflow=mergeWorkflow,
            split_algo="ParentlessMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(
            fileset=self.bogusFileset,
            workflow=mergeWorkflow,
            split_algo="ParentlessMergeBySize")

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T1_US_FNAL_Disk"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T1_US_FNAL_Disk"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T1_US_FNAL_Disk"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations=set(["T1_US_FNAL_Disk"]))
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T1_US_FNAL_Disk"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T1_US_FNAL_Disk"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T1_US_FNAL_Disk"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T1_US_FNAL_Disk"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations=set(["T1_US_FNAL_Disk"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=102400,
                       first_event=2048,
                       locations=set(["T1_US_FNAL_Disk"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn="fileIV",
                      size=102400,
                      events=1024,
                      first_event=3072,
                      locations=set(["T1_US_FNAL_Disk"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for jobFile in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV
        ]:
            self.mergeFileset.addFile(jobFile)
            self.bogusFileset.addFile(jobFile)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return
Example #44
0
    def setupForKillTest(self, baAPI=None):
        """
        _setupForKillTest_

        Inject a workflow into WMBS that has a processing task, a merge task and
        a cleanup task.  Inject files into the various tasks at various
        processing states (acquired, complete, available...).  Also create jobs
        for each subscription in various states.
        """
        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daoFactory(classname="Locations.New")
        changeStateAction = daoFactory(classname="Jobs.ChangeState")
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   pnn='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \
                                        maxSlots = 10000, pendingSlots = 10000)

        userDN = 'someDN'
        userAction = daoFactory(classname="Users.New")
        userAction.execute(dn=userDN,
                           group_name='DEFAULT',
                           role_name='DEFAULT')

        inputFileset = Fileset("input")
        inputFileset.create()

        inputFileA = File("lfnA", locations="goodse.cern.ch")
        inputFileB = File("lfnB", locations="goodse.cern.ch")
        inputFileC = File("lfnC", locations="goodse.cern.ch")
        inputFileA.create()
        inputFileB.create()
        inputFileC.create()

        inputFileset.addFile(inputFileA)
        inputFileset.addFile(inputFileB)
        inputFileset.addFile(inputFileC)
        inputFileset.commit()

        unmergedOutputFileset = Fileset("unmerged")
        unmergedOutputFileset.create()

        unmergedFileA = File("ulfnA", locations="goodse.cern.ch")
        unmergedFileB = File("ulfnB", locations="goodse.cern.ch")
        unmergedFileC = File("ulfnC", locations="goodse.cern.ch")
        unmergedFileA.create()
        unmergedFileB.create()
        unmergedFileC.create()

        unmergedOutputFileset.addFile(unmergedFileA)
        unmergedOutputFileset.addFile(unmergedFileB)
        unmergedOutputFileset.addFile(unmergedFileC)
        unmergedOutputFileset.commit()

        mainProcWorkflow = Workflow(spec="spec1",
                                    owner="Steve",
                                    name="Main",
                                    task="Proc")
        mainProcWorkflow.create()
        mainProcMergeWorkflow = Workflow(spec="spec1",
                                         owner="Steve",
                                         name="Main",
                                         task="ProcMerge")
        mainProcMergeWorkflow.create()
        mainCleanupWorkflow = Workflow(spec="spec1",
                                       owner="Steve",
                                       name="Main",
                                       task="Cleanup")
        mainCleanupWorkflow.create()

        self.mainProcSub = Subscription(fileset=inputFileset,
                                        workflow=mainProcWorkflow,
                                        type="Processing")
        self.mainProcSub.create()
        self.mainProcSub.acquireFiles(inputFileA)
        self.mainProcSub.completeFiles(inputFileB)

        procJobGroup = JobGroup(subscription=self.mainProcSub)
        procJobGroup.create()
        self.procJobA = Job(name="ProcJobA")
        self.procJobA["state"] = "new"
        self.procJobA["location"] = "site1"
        self.procJobB = Job(name="ProcJobB")
        self.procJobB["state"] = "executing"
        self.procJobB["location"] = "site1"
        self.procJobC = Job(name="ProcJobC")
        self.procJobC["state"] = "complete"
        self.procJobC["location"] = "site1"
        self.procJobA.create(procJobGroup)
        self.procJobB.create(procJobGroup)
        self.procJobC.create(procJobGroup)

        self.mainMergeSub = Subscription(fileset=unmergedOutputFileset,
                                         workflow=mainProcMergeWorkflow,
                                         type="Merge")
        self.mainMergeSub.create()
        self.mainMergeSub.acquireFiles(unmergedFileA)
        self.mainMergeSub.failFiles(unmergedFileB)

        mergeJobGroup = JobGroup(subscription=self.mainMergeSub)
        mergeJobGroup.create()
        self.mergeJobA = Job(name="MergeJobA")
        self.mergeJobA["state"] = "exhausted"
        self.mergeJobA["location"] = "site1"
        self.mergeJobB = Job(name="MergeJobB")
        self.mergeJobB["state"] = "cleanout"
        self.mergeJobB["location"] = "site1"
        self.mergeJobC = Job(name="MergeJobC")
        self.mergeJobC["state"] = "new"
        self.mergeJobC["location"] = "site1"
        self.mergeJobA.create(mergeJobGroup)
        self.mergeJobB.create(mergeJobGroup)
        self.mergeJobC.create(mergeJobGroup)

        self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset,
                                           workflow=mainCleanupWorkflow,
                                           type="Cleanup")
        self.mainCleanupSub.create()
        self.mainCleanupSub.acquireFiles(unmergedFileA)
        self.mainCleanupSub.completeFiles(unmergedFileB)

        cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub)
        cleanupJobGroup.create()
        self.cleanupJobA = Job(name="CleanupJobA")
        self.cleanupJobA["state"] = "new"
        self.cleanupJobA["location"] = "site1"
        self.cleanupJobB = Job(name="CleanupJobB")
        self.cleanupJobB["state"] = "executing"
        self.cleanupJobB["location"] = "site1"
        self.cleanupJobC = Job(name="CleanupJobC")
        self.cleanupJobC["state"] = "complete"
        self.cleanupJobC["location"] = "site1"
        self.cleanupJobA.create(cleanupJobGroup)
        self.cleanupJobB.create(cleanupJobGroup)
        self.cleanupJobC.create(cleanupJobGroup)

        jobList = [
            self.procJobA, self.procJobB, self.procJobC, self.mergeJobA,
            self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB,
            self.cleanupJobC
        ]

        changeStateAction.execute(jobList)

        if baAPI:
            for job in jobList:
                job['plugin'] = 'TestPlugin'
                job['userdn'] = userDN
                job['usergroup'] = 'DEFAULT'
                job['userrole'] = 'DEFAULT'
                job['custom']['location'] = 'site1'
            baAPI.createNewJobs(wmbsJobs=jobList)

        # We'll create an unrelated workflow to verify that it isn't affected
        # by the killing code.
        bogusFileset = Fileset("dontkillme")
        bogusFileset.create()

        bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch")
        bogusFileA.create()
        bogusFileset.addFile(bogusFileA)
        bogusFileset.commit()

        bogusWorkflow = Workflow(spec="spec2",
                                 owner="Steve",
                                 name="Bogus",
                                 task="Proc")
        bogusWorkflow.create()
        self.bogusSub = Subscription(fileset=bogusFileset,
                                     workflow=bogusWorkflow,
                                     type="Processing")
        self.bogusSub.create()
        self.bogusSub.acquireFiles(bogusFileA)
        return
Example #45
0
    def testRetryCount(self):
        """
        _testRetryCount_

        Verify that the retry count is incremented when we move out of the
        submitcooloff or jobcooloff state.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname = "Locations.New")
        locationAction.execute("site1", pnn = "T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl, owner="Steve",
                                name="wf001", task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        for i in range(4):
            newFile = File(lfn="File%s" % i, locations=set(["T2_CH_CERN"]))
            newFile.create()
            testFileset.addFile(newFile)

        testFileset.commit()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="FileBased")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 4, \
               "Error: Splitting should have created four jobs."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"
        testJobB = jobGroup.jobs[1]
        testJobB["user"] = "******"
        testJobB["group"] = "DMWM"
        testJobB["taskType"] = "Processing"
        testJobC = jobGroup.jobs[2]
        testJobC["user"] = "******"
        testJobC["group"] = "DMWM"
        testJobC["taskType"] = "Processing"
        testJobD = jobGroup.jobs[3]
        testJobD["user"] = "******"
        testJobD["group"] = "DMWM"
        testJobD["taskType"] = "Processing"

        change.persist([testJobA], "created", "submitcooloff")
        change.persist([testJobB], "created", "jobcooloff")
        change.persist([testJobC, testJobD], "new", "none")

        testJobA.load()
        testJobB.load()
        testJobC.load()
        testJobD.load()

        assert testJobA["retry_count"] == 1, \
               "Error: Retry count is wrong."
        assert testJobB["retry_count"] == 1, \
               "Error: Retry count is wrong."
        assert testJobC["retry_count"] == 0, \
               "Error: Retry count is wrong."
        assert testJobD["retry_count"] == 0, \
               "Error: Retry count is wrong."

        return
Example #46
0
    def testAnalysis(self):
        """
        _testAnalysis_
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchUrl"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "analysis_t"
        defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig(
        )
        defaultArguments["ProcessingVersion"] = 1

        analysisProcessingFactory = AnalysisWorkloadFactory()
        testWorkload = analysisProcessingFactory("TestWorkload",
                                                 defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "Analysis", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)
        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/Analysis")
        procWorkflow.load()
        self.assertEqual(len(procWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]  #Actually Analysis does not have a merge task
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()
        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Analysis/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Analysis/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        output = procWorkflow.outputMap["output"][0]["output_fileset"]
        mergedOutput = procWorkflow.outputMap["output"][0][
            "merged_output_fileset"]
        output.loadData()
        mergedOutput.loadData()
        self.assertEqual(
            output.name, "/TestWorkload/Analysis/unmerged-output",
            "Error: Unmerged output fileset is wrong: " + output.name)
        self.assertEqual(
            mergedOutput.name, "/TestWorkload/Analysis/unmerged-output",
            "Error: Unmerged output fileset is wrong: " + mergedOutput.name)

        topLevelFileset = Fileset(name="TestWorkload-Analysis-SomeBlock")
        topLevelFileset.loadData()
        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()
        self.assertEqual(procSubscription["type"], "Analysis",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name="/TestWorkload/Analysis/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload", task="/TestWorkload/Analysis/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()
        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")
Example #47
0
    def testDuplicateJobReports(self):
        """
        _testDuplicateJobReports_

        Verify that everything works correctly if a job report is added to the
        database more than once.
        """
        change = ChangeState(self.config, "changestate_t")

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute("site1", pnn="T2_CH_CERN")

        testWorkflow = Workflow(spec=self.specUrl, owner="Steve",
                                name="wf001", task=self.taskName)
        testWorkflow.create()
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testFile = File(lfn="SomeLFNC", locations=set(["T2_CH_CERN"]))
        testFile.create()
        testFileset.addFile(testFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroup = jobFactory(files_per_job=1)[0]

        assert len(jobGroup.jobs) == 1, \
               "Error: Splitting should have created one job."

        testJobA = jobGroup.jobs[0]
        testJobA["user"] = "******"
        testJobA["group"] = "DMWM"
        testJobA["taskType"] = "Processing"

        change.propagate([testJobA], 'created', 'new')
        myReport = Report()
        reportPath = os.path.join(getTestBase(),
                                  "WMCore_t/JobStateMachine_t/Report.pkl")
        myReport.unpersist(reportPath)
        testJobA["fwjr"] = myReport

        change.propagate([testJobA], 'executing', 'created')
        change.propagate([testJobA], 'executing', 'created')

        changeStateDB = self.couchServer.connectDatabase(dbname="changestate_t/fwjrs")
        allDocs = changeStateDB.document("_all_docs")

        self.assertEqual(len(allDocs["rows"]), 2,
                         "Error: Wrong number of documents")

        for resultRow in allDocs["rows"]:
            if resultRow["id"] != "_design/FWJRDump":
                changeStateDB.document(resultRow["id"])
                break

        return
Example #48
0
    def testPromptRecoWithSkims(self):
        """
        _testT1PromptRecoWithSkim_

        Create a T1 Prompt Reconstruction workflow with PromptSkims
        and verify it installs into WMBS correctly.
        """
        testArguments = PromptRecoWorkloadFactory.getTestArguments()
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        testArguments["EnableHarvesting"] = True
        factory = PromptRecoWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction(
            "TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "Reco",
                                    "SomeBlock",
                                    cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper._createSubscriptionsInWMBS(
            testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()),
                         len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = {
            "write_RECO": "RECO",
            "write_ALCARECO": "ALCARECO",
            "write_AOD": "AOD",
            "write_DQM": "DQM"
        }
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergedOutput = recoWorkflow.outputMap[fset][0][
                "merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[fset][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(
                    mergedOutput.name,
                    "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" %
                    (goldenOutputMod, tier),
                    "Error: Merged output fileset is wrong: %s" %
                    mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % fset,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name="TestWorkload",
                                    task="/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(
            len(alcaSkimWorkflow.outputMap.keys()),
            len(testArguments["AlcaSkims"]) + 1,
            "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            fset = goldenOutputMod + "ALCARECO"
            mergedOutput = alcaSkimWorkflow.outputMap[fset][0][
                "merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[fset][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO"
                % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" %
                goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        dqmWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged"
        )
        dqmWorkflow.load()

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = {
            "write_RECO": "RECO",
            "write_AOD": "AOD",
            "write_DQM": "DQM"
        }
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Reco/RecoMerge%s" %
                                     goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap[
                "Merged%s" % tier][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap[
                "Merged%s" % tier][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" %
                (goldenOutputMod, tier),
                "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" %
                (goldenOutputMod, tier),
                "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod, "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(
                len(mergeWorkflow.outputMap.keys()), 2,
                "Error: Wrong number of WF outputs %d." %
                len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO"
                % goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO"
                % goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            recoSubscription["split_algo"], "EventAwareLumiBased",
            "Error: Wrong split algorithm. %s" %
            recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(
            name="/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset=alcaRecoFileset,
                                            workflow=alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            alcaSkimSubscription["split_algo"], "ParentlessMergeBySize",
            "Error: Wrong split algorithm. %s" %
            alcaSkimSubscription["split_algo"])

        mergedDQMFileset = Fileset(
            name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM")
        mergedDQMFileset.loadData()

        dqmSubscription = Subscription(fileset=mergedDQMFileset,
                                       workflow=dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        unmergedOutputs = {
            "write_RECO": "RECO",
            "write_AOD": "AOD",
            "write_DQM": "DQM"
        }
        for unmergedOutput, tier in unmergedOutputs.items():
            fset = unmergedOutput + tier
            unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                       fset)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedDataTier,
                                             workflow=dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "ParentlessMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" %
                unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedAlcaSkim,
                                             workflow=alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "ParentlessMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])

        goldenOutputMods = {
            "write_RECO": "RECO",
            "write_AOD": "AOD",
            "write_DQM": "DQM"
        }
        for goldenOutputMod, tier in goldenOutputMods.items():
            fset = goldenOutputMod + tier
            unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                      fset)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" %
                goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=recoLogCollect,
                                     workflow=recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(
            name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=alcaSkimLogCollect,
                                     workflow=alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(
                name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" %
                (goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=recoMergeLogCollect,
                workflow=recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(
                name=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect"
                % (goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=alcaSkimLogCollect,
                workflow=alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        dqmHarvestLogCollect = Fileset(
            name=
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive"
        )
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect"
        )
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset=dqmHarvestLogCollect,
                                     workflow=dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Example #49
0
class RepackMergeTest(unittest.TestCase):
    """
    _RepackMergeTest_

    Test for RepackMerge job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()

        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN2')
                                    """, transaction = False)


        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 2 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 3 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 4 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 5 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "A" },
                                transaction = False)

        insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" },
                                     transaction = False)

        insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion")
        insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1,
                                                      'STREAM' : 'A',
                                                      'VERSION' : "CMSSW_4_2_7" },
                                            transaction = False)

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : 4,
                                            'STREAM' : "A",
                                            'LFN' : "/testLFN/A",
                                            'FILESIZE' : 100,
                                            'EVENTS' : 100,
                                            'TIME' : int(time.time()) },
                                  transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset2 = Fileset(name = "TestFileset2")
        self.fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Repack",
                                           type = "Repack")
        self.subscription2  = Subscription(fileset = self.fileset2,
                                           workflow = workflow2,
                                           split_algo = "RepackMerge",
                                           type = "RepackMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction = False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers")                                                      
        self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles")
        self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024
        self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 100000000
        self.splitArgs['maxInputFiles'] = 1000
        self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def deleteSplitLumis(self):
        """
        _deleteSplitLumis_

        """
        myThread = threading.currentThread()

        myThread.dbi.processData("""DELETE FROM lumi_section_split_active
                                    """,
                                 transaction = False)

        return

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works

        Test max edm size threshold for single lumi

        small lumi, followed by over-large lumi
        expect 1 job for small lumi and 4 jobs for over-large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2 * lumi):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxEdmSize'] = 13000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("RepackMerge-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 3,
                         "ERROR: Job does not process 3 files")

        job = jobGroups[0].jobs[2]
        self.assertEqual(len(job.getFiles()), 1,
                         "ERROR: Job does not process 1 file")

        return

    def test01(self):
        """
        _test01_

        Test max size threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test02(self):
        """
        _test02_

        Test max event threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100 * lumi)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test03(self):
        """
        _test03_

        Test max input files threshold for single lumi

        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(lumi * 2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 3
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test04(self):
        """
        _test04_

        Test max size threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test05(self):
        """
        _test05_

        Test max event threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test06(self):
        """
        _test06_

        Test max input files threshold for multi lumi

        3 same size lumis

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test07(self):
        """
        _test07_

        Test over merge

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test08(self):
        """
        _test08_

        Test under merge (over merge size threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxOverSize'] = 9500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test09(self):
        """
        _test09_

        Test under merge (over merge event threshold)

        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test10(self):
        """
        _test10_

        Test merging of multiple lumis with holes in the lumi sequence

        Hole is due to no streamer files for the lumi

        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 5]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 4,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 1,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        self.feedStreamersDAO.execute(transaction = False)
        self.fileset1.loadData()

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.acquireFilesDAO.execute(self.subscription1['id'], fileid,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.completeFilesDAO.execute(self.subscription1['id'], fileid,
                                          transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Example #50
0
workload = factory(workloadName, arguments)
workloadPath = os.path.join(workloadName, workloadFile)
workload.setOwner("*****@*****.**")
workload.setSpecUrl(workloadPath)

# Build a sandbox using TaskMaker
taskMaker = TaskMaker(workload, os.path.join(os.getcwd(), workloadName))
taskMaker.skipSubscription = True
taskMaker.processWorkload()

workload.save(workloadPath)

myThread = threading.currentThread()
myThread.transaction.begin()
for workloadTask in workload.taskIterator():
    inputFileset = Fileset(name = workloadTask.getPathName())
    inputFileset.create()

    virtualFile = File(lfn = "%s-virtual-input" % workloadTask.getPathName(),
                       size = 0, events = numEvents,
                       locations = set(["cmssrm.fnal.gov", "storm-fe-cms.cr.cnaf.infn.it",
                                        "cmssrm-fzk.gridka.de", "srm2.grid.sinica.edu.tw",
                                        "srm-cms.gridpp.rl.ac.uk", "ccsrm.in2p3.fr",
                                        "srmcms.pic.es"]), merged = False)

    myRun = Run(runNumber = 1)
    myRun.appendLumi(1)
    virtualFile.addRun(myRun)
    virtualFile.create()
    inputFileset.addFile(virtualFile)
    inputFileset.commit()
Example #51
0
    def createJobs(self):
        """
        _createJobs_

        Create test jobs in WMBS and BossAir
        """
        testWorkflow = Workflow(spec = makeUUID(), owner = "tapas",
                                name = makeUUID(), task = "Test")
        testWorkflow.create()

        testFilesetA = Fileset(name = "TestFilesetA")
        testFilesetA.create()
        testFilesetB = Fileset(name = "TestFilesetB")
        testFilesetB.create()
        testFilesetC = Fileset(name = "TestFilesetC")
        testFilesetC.create()

        testFileA = File(lfn = "testFileA", locations = set(["testSE1", "testSE2"]))
        testFileA.create()
        testFilesetA.addFile(testFileA)
        testFilesetA.commit()
        testFilesetB.addFile(testFileA)
        testFilesetB.commit()
        testFilesetC.addFile(testFileA)
        testFilesetC.commit()

        testSubscriptionA = Subscription(fileset = testFilesetA,
                                        workflow = testWorkflow,
                                        type = "Processing")
        testSubscriptionA.create()
        testSubscriptionA.addWhiteBlackList([{"site_name": "testSite1", "valid": True}])
        testSubscriptionB = Subscription(fileset = testFilesetB,
                                        workflow = testWorkflow,
                                        type = "Processing")
        testSubscriptionB.create()
        testSubscriptionB.addWhiteBlackList([{"site_name": "testSite1", "valid": False}])
        testSubscriptionC = Subscription(fileset = testFilesetC,
                                        workflow = testWorkflow,
                                        type = "Merge")
        testSubscriptionC.create()

        testJobGroupA = JobGroup(subscription = testSubscriptionA)
        testJobGroupA.create()
        testJobGroupB = JobGroup(subscription = testSubscriptionB)
        testJobGroupB.create()
        testJobGroupC = JobGroup(subscription = testSubscriptionC)
        testJobGroupC.create()

        # Site1, Has been assigned a location and is complete.
        testJobA = Job(name = "testJobA", files = [testFileA])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group = testJobGroupA)
        testJobA["state"] = "success"

        # Site 1, Has been assigned a location and is incomplete.
        testJobB = Job(name = "testJobB", files = [testFileA])
        testJobB["couch_record"] = makeUUID()
        testJobB["cache_dir"] = self.tempDir
        testJobB.create(group = testJobGroupA)
        testJobB["state"] = "executing"
        runJobB = RunJob()
        runJobB.buildFromJob(testJobB)
        runJobB["status"] = "PEND"

        # Does not have a location, white listed to site 1
        testJobC = Job(name = "testJobC", files = [testFileA])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group = testJobGroupA)
        testJobC["state"] = "new"

        # Site 2, Has been assigned a location and is complete.
        testJobD = Job(name = "testJobD", files = [testFileA])
        testJobD["couch_record"] = makeUUID()
        testJobD.create(group = testJobGroupB)
        testJobD["state"] = "success"

        # Site 2, Has been assigned a location and is incomplete.
        testJobE = Job(name = "testJobE", files = [testFileA])
        testJobE["couch_record"] = makeUUID()
        testJobE.create(group = testJobGroupB)
        testJobE["state"] = "executing"
        runJobE = RunJob()
        runJobE.buildFromJob(testJobE)
        runJobE["status"] = "RUN"

        # Does not have a location, site 1 is blacklisted.
        testJobF = Job(name = "testJobF", files = [testFileA])
        testJobF["couch_record"] = makeUUID()
        testJobF.create(group = testJobGroupB)
        testJobF["state"] = "new"

        # Site 3, Has been assigned a location and is complete.
        testJobG = Job(name = "testJobG", files = [testFileA])
        testJobG["couch_record"] = makeUUID()
        testJobG.create(group = testJobGroupC)
        testJobG["state"] = "cleanout"

        # Site 3, Has been assigned a location and is incomplete.
        testJobH = Job(name = "testJobH", files = [testFileA])
        testJobH["couch_record"] = makeUUID()
        testJobH.create(group = testJobGroupC)
        testJobH["state"] = "new"

        # Site 3, Does not have a location.
        testJobI = Job(name = "testJobI", files = [testFileA])
        testJobI["couch_record"] = makeUUID()
        testJobI.create(group = testJobGroupC)
        testJobI["state"] = "new"

        # Site 3, Does not have a location and is in cleanout.
        testJobJ = Job(name = "testJobJ", files = [testFileA])
        testJobJ["couch_record"] = makeUUID()
        testJobJ.create(group = testJobGroupC)
        testJobJ["state"] = "cleanout"

        changeStateAction = self.daoFactory(classname = "Jobs.ChangeState")
        changeStateAction.execute(jobs = [testJobA, testJobB, testJobC, testJobD,
                                          testJobE, testJobF, testJobG, testJobH,
                                          testJobI, testJobJ])

        self.insertRunJob.execute([runJobB, runJobE])

        setLocationAction = self.daoFactory(classname = "Jobs.SetLocation")
        setLocationAction.execute(testJobA["id"], "testSite1")
        setLocationAction.execute(testJobB["id"], "testSite1")
        setLocationAction.execute(testJobD["id"], "testSite1")
        setLocationAction.execute(testJobE["id"], "testSite2")
        setLocationAction.execute(testJobG["id"], "testSite1")
        setLocationAction.execute(testJobH["id"], "testSite1")

        return
Example #52
0
        myRun = Run(runNumber=dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    if len(inputFileset) < 1:
        raise Exception("No files were selected!")

    inputFileset.commit()
    inputFileset.markOpen(False)
    return


myThread = threading.currentThread()
myThread.transaction.begin()
for workloadTask in workload.taskIterator():
    inputFileset = Fileset(name=workloadTask.getPathName())
    inputFileset.create()

    inputDataset = workloadTask.inputDataset()
    inputDatasetPath = "/%s/%s/%s" % (
        inputDataset.primary, inputDataset.processed, inputDataset.tier)
    injectFilesFromDBS(inputFileset, inputDatasetPath, options.RunWhitelist)

    myWMBSHelper = WMBSHelper(workload)
    myWMBSHelper._createSubscriptionsInWMBS(workloadTash.getPathName())

myThread.transaction.commit()
Example #53
0
class SplitFileBasedTest(unittest.TestCase):
    """
    _SplitFileBasedTest_

    Unit tests for the split file job splitting algorithm.
    """
    def setUp(self):
        """
        _setUp_

        Create database connection and load up the WMBS schema.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def stuffWMBS(self):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        locationAction.execute("site1", pnn="T2_CH_CERN")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        self.mergeMergedFileset = Fileset(name="mergeMergedFileset")
        self.mergeMergedFileset.create()
        self.bogusMergedFileset = Fileset(name="bogusMergedFileset")
        self.bogusMergedFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow",
                                 spec="bunk2",
                                 owner="Steve",
                                 task="Test")
        mergeWorkflow.create()

        self.mergeSubscription = Subscription(fileset=self.mergeFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="SplitFileBased")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(fileset=self.bogusFileset,
                                              workflow=mergeWorkflow,
                                              split_algo="SplitFileBased")

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputWorkflow = Workflow(name="inputWorkflow",
                                 spec="input",
                                 owner="Steve",
                                 task="Test")
        inputWorkflow.create()
        inputWorkflow.addOutput("someOutput", self.mergeFileset,
                                self.mergeMergedFileset)
        inputWorkflow.addOutput("someOutput2", self.bogusFileset,
                                self.bogusMergedFileset)

        inputSubscription = Subscription(fileset=inputFileset,
                                         workflow=inputWorkflow)
        inputSubscription.create()

        parentFile1 = File(lfn="parentFile1")
        parentFile1.create()
        parentFile2 = File(lfn="parentFile2")
        parentFile2.create()
        parentFile3 = File(lfn="parentFile3")
        parentFile3.create()
        parentFile4 = File(lfn="parentFile4")
        parentFile4.create()

        jobGroup1 = JobGroup(subscription=inputSubscription)
        jobGroup1.create()
        jobGroup2 = JobGroup(subscription=inputSubscription)
        jobGroup2.create()

        testJob1 = Job()
        testJob1.addFile(parentFile1)
        testJob1.create(jobGroup1)
        testJob1["state"] = "cleanout"
        testJob1["oldstate"] = "new"
        testJob1["couch_record"] = "somejive"
        testJob1["retry_count"] = 0
        testJob1["outcome"] = "success"
        testJob1.save()
        changeStateDAO.execute([testJob1])

        testJob2 = Job()
        testJob2.addFile(parentFile2)
        testJob2.create(jobGroup1)
        testJob2["state"] = "cleanout"
        testJob2["oldstate"] = "new"
        testJob2["couch_record"] = "somejive"
        testJob2["retry_count"] = 0
        testJob2["outcome"] = "success"
        testJob2.save()
        changeStateDAO.execute([testJob2])

        testJob3 = Job()
        testJob3.addFile(parentFile3)
        testJob3.create(jobGroup2)
        testJob3["state"] = "cleanout"
        testJob3["oldstate"] = "new"
        testJob3["couch_record"] = "somejive"
        testJob3["retry_count"] = 0
        testJob3["outcome"] = "success"
        testJob3.save()
        changeStateDAO.execute([testJob3])

        testJob4 = Job()
        testJob4.addFile(parentFile4)
        testJob4.create(jobGroup2)
        testJob4["state"] = "cleanout"
        testJob4["oldstate"] = "new"
        testJob4["couch_record"] = "somejive"
        testJob4["retry_count"] = 0
        testJob4["outcome"] = "failure"
        testJob4.save()
        changeStateDAO.execute([testJob4])

        # We'll simulate a failed split by event job that the merger should
        # ignore.
        parentFile5 = File(lfn="parentFile5")
        parentFile5.create()

        testJob5 = Job()
        testJob5.addFile(parentFile5)
        testJob5.create(jobGroup2)
        testJob5["state"] = "cleanout"
        testJob5["oldstate"] = "new"
        testJob5["couch_record"] = "somejive"
        testJob5["retry_count"] = 0
        testJob5["outcome"] = "success"
        testJob5.save()
        changeStateDAO.execute([testJob5])

        testJob6 = Job()
        testJob6.addFile(parentFile5)
        testJob6.create(jobGroup2)
        testJob6["state"] = "cleanout"
        testJob6["oldstate"] = "new"
        testJob6["couch_record"] = "somejive"
        testJob6["retry_count"] = 0
        testJob6["outcome"] = "failure"
        testJob6.save()
        changeStateDAO.execute([testJob6])

        badFile1 = File(lfn="badFile1",
                        size=10241024,
                        events=10241024,
                        first_event=0,
                        locations=set(["T2_CH_CERN"]))
        badFile1.addRun(Run(1, *[45]))
        badFile1.create()
        badFile1.addParent(parentFile5["lfn"])

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file1.addParent(parentFile1["lfn"])
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T2_CH_CERN"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file2.addParent(parentFile1["lfn"])
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T2_CH_CERN"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file3.addParent(parentFile1["lfn"])
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations=set(["T2_CH_CERN"]))
        file4.addRun(Run(1, *[45]))
        file4.create()
        file4.addParent(parentFile1["lfn"])

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileA.addParent(parentFile2["lfn"])
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T2_CH_CERN"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileB.addParent(parentFile2["lfn"])
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T2_CH_CERN"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()
        fileC.addParent(parentFile2["lfn"])

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileI.addParent(parentFile3["lfn"])
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations=set(["T2_CH_CERN"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileII.addParent(parentFile3["lfn"])
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=1024,
                       first_event=2048,
                       locations=set(["T2_CH_CERN"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIII.addParent(parentFile3["lfn"])
        fileIV = File(lfn="fileIV",
                      size=1024,
                      events=1024,
                      first_event=3072,
                      locations=set(["T2_CH_CERN"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()
        fileIV.addParent(parentFile3["lfn"])

        fileX = File(lfn="badFileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T2_CH_CERN"]))
        fileX.addRun(Run(1, *[47]))
        fileX.create()
        fileX.addParent(parentFile4["lfn"])
        fileY = File(lfn="badFileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T2_CH_CERN"]))
        fileY.addRun(Run(1, *[47]))
        fileY.create()
        fileY.addParent(parentFile4["lfn"])
        fileZ = File(lfn="badFileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T2_CH_CERN"]))
        fileZ.addRun(Run(1, *[47]))
        fileZ.create()
        fileZ.addParent(parentFile4["lfn"])

        jobGroup1.output.addFile(file1)
        jobGroup1.output.addFile(file2)
        jobGroup1.output.addFile(file3)
        jobGroup1.output.addFile(file4)
        jobGroup1.output.addFile(fileA)
        jobGroup1.output.addFile(fileB)
        jobGroup1.output.addFile(fileC)
        jobGroup1.output.commit()

        jobGroup2.output.addFile(fileI)
        jobGroup2.output.addFile(fileII)
        jobGroup2.output.addFile(fileIII)
        jobGroup2.output.addFile(fileIV)
        jobGroup2.output.addFile(fileX)
        jobGroup2.output.addFile(fileY)
        jobGroup2.output.addFile(fileZ)
        jobGroup2.output.addFile(badFile1)
        jobGroup2.output.commit()

        for file in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV, fileX, fileY, fileZ, badFile1
        ]:
            self.mergeFileset.addFile(file)
            self.bogusFileset.addFile(file)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return

    def testSplitAlgo(self):
        """
        _testSplitAlgo_

        Run the SplitFileBased splitting algorithm over the data created in
        the merge subscription.  This should produce three job groups each
        containing one job.  The files in the job should be ordered correctly.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory()

        assert len(result) == 3, \
               "ERROR: Wrong number of job groups returned: %s" % len(result)

        for jobGroup in result:
            assert len(jobGroup.jobs) == 1, \
                   "ERROR: One job should be in a job group."

        goldenFilesA = ["file1", "file2", "file3", "file4"]
        goldenFilesB = ["fileA", "fileB", "fileC"]
        goldenFilesC = ["fileI", "fileII", "fileIII", "fileIV"]

        for jobGroup in result:
            jobFiles = jobGroup.jobs.pop().getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                goldenFiles = goldenFilesB
            else:
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for file in jobFiles:
                file.loadData()
                assert file["lfn"] in goldenFiles, \
                       "Error: Unknown file in merge jobs."
                assert len(file["locations"]) == 1, \
                       "Error: Wrong number of file locations."
                assert "T2_CH_CERN" in file["locations"], \
                       "Error: File is missing a location."

                goldenFiles.remove(file["lfn"])

            fileRun = list(file["runs"])[0].run
            fileLumi = min(list(file["runs"])[0])
            fileEvent = file["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                   "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                       "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent > currentEvent, \
                           "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               len(goldenFilesC) == 0, \
               "ERROR: Files missing from merge jobs."

        return
Example #54
0
    def createTestJobGroup(self,
                           nJobs=10,
                           retry_count=1,
                           workloadPath='test',
                           fwjrPath=None,
                           workloadName=makeUUID(),
                           fileModifier=''):
        """
        Creates a group of several jobs
        """

        myThread = threading.currentThread()
        myThread.transaction.begin()
        testWorkflow = Workflow(spec=workloadPath,
                                owner="cmsdataops",
                                group="cmsdataops",
                                name=workloadName,
                                task="/TestWorkload/ReReco")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier,
                         size=1024,
                         events=10)
        testFile0.addRun(Run(10, *[12312]))
        testFile0.setLocation('T2_CH_CERN')

        testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier,
                         size=1024,
                         events=10,
                         first_event=88,
                         merged=False)
        testFileA.addRun(Run(10, *[12312, 12313]))
        testFileA.setLocation('T2_CH_CERN')

        testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier,
                         size=1024,
                         events=10,
                         first_event=88,
                         merged=False)
        testFileB.addRun(Run(10, *[12314, 12315, 12316]))
        testFileB.setLocation('T2_CH_CERN')

        testFile0.create()
        testFileA.create()
        testFileB.create()

        testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier)
        testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier)

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob['retry_count'] = retry_count
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312])
            testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316])
            testJob['cache_dir'] = os.path.join(self.testDir, testJob['name'])
            testJob['fwjr_path'] = fwjrPath
            os.mkdir(testJob['cache_dir'])
            testJobGroup.add(testJob)
            testJob.create(group=testJobGroup)
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob.save()

        testJobGroup.commit()

        testSubscription.acquireFiles(files=[testFileA, testFileB])
        testSubscription.save()
        myThread.transaction.commit()

        return testJobGroup
Example #55
0
    def testGetFinishedWorkflows(self):
        """
        _testGetFinishedWorkflows_

        Test that we get only those workflows which are finished, that is, workflows where
        all its subscriptions are finished and all other workflows with the same
        spec are finished too

        """

        owner = "no-one"

        #Create a bunch of worklows with "different" specs and tasks
        workflows = []
        for i in range(0, 100):
            scaledIndex = i % 10
            testWorkflow = Workflow(spec="sp00%i" % scaledIndex,
                                    owner=owner,
                                    name="wf00%i" % scaledIndex,
                                    task="task%i" % i)
            testWorkflow.create()
            workflows.append(testWorkflow)

        #Everyone will use this fileset
        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        #Create subscriptions!
        subscriptions = []
        for workflow in workflows:
            subscription = Subscription(fileset=testFileset, workflow=workflow)
            subscription.create()
            subscriptions.append(subscription)

        #Check that all workflows are NOT finished
        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        getFinishedDAO = daoFactory(classname="Workflow.GetFinishedWorkflows")
        result = getFinishedDAO.execute()
        self.assertEqual(
            len(result), 0,
            "A workflow is incorrectly flagged as finished: %s" % str(result))

        #Mark the first 50 subscriptions as finished
        for idx, sub in enumerate(subscriptions):
            if idx > 49:
                break
            sub.markFinished()

        #No workflow is finished, none of them has all the subscriptions completed
        result = getFinishedDAO.execute()
        self.assertEqual(
            len(result), 0,
            "A workflow is incorrectly flagged as finished: %s" % str(result))

        #Now finish all workflows in wf{000-5}
        for idx, sub in enumerate(subscriptions):
            if idx < 50 or idx % 10 > 5:
                continue
            sub.markFinished()

        #Check the workflows
        result = getFinishedDAO.execute()
        self.assertEqual(
            len(result), 6,
            "A workflow is incorrectly flagged as finished: %s" % str(result))

        #Check the overall structure of the workflows
        for wf in result:
            #Sanity checks on the results
            # These are very specific checks and depends heavily on the names of task, spec and workflow
            self.assertEqual(
                wf[2:], result[wf]['spec'][2:],
                "A workflow has the wrong spec-name combination: %s" % str(wf))
            self.assertTrue(
                int(wf[2:]) < 6,
                "A workflow is incorrectly flagged as finished: %s" % str(wf))
            self.assertEqual(
                len(result[wf]['workflows']), 10,
                "A workflow has more tasks than it should: %s" %
                str(result[wf]))
            for task in result[wf]['workflows']:
                self.assertEqual(
                    len(result[wf]['workflows'][task]), 1,
                    "A workflow has more subscriptions than it should: %s" %
                    str(result[wf]))

        return
Example #56
0
class ParentlessMergeBySizeTest(unittest.TestCase):
    """
    _ParentlessMergeBySizeTest_

    Unit tests for parentless WMBS merging.
    """
    def setUp(self):
        """
        _setUp_

        Boiler plate DB setup.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        return

    def tearDown(self):
        """
        _tearDown_

        Clear out WMBS.
        """
        self.testInit.clearDatabase()
        return

    def stuffWMBS(self):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T1_US_FNAL_Disk")

        self.mergeFileset = Fileset(name="mergeFileset")
        self.mergeFileset.create()
        self.bogusFileset = Fileset(name="bogusFileset")
        self.bogusFileset.create()

        mergeWorkflow = Workflow(name="mergeWorkflow",
                                 spec="bunk2",
                                 owner="Steve",
                                 task="Test")
        mergeWorkflow.create()
        markWorkflow = self.daoFactory(
            classname="Workflow.MarkInjectedWorkflows")
        markWorkflow.execute(names=[mergeWorkflow.name], injected=True)

        self.mergeSubscription = Subscription(
            fileset=self.mergeFileset,
            workflow=mergeWorkflow,
            split_algo="ParentlessMergeBySize")
        self.mergeSubscription.create()
        self.bogusSubscription = Subscription(
            fileset=self.bogusFileset,
            workflow=mergeWorkflow,
            split_algo="ParentlessMergeBySize")

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T1_US_FNAL_Disk"]))
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T1_US_FNAL_Disk"]))
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T1_US_FNAL_Disk"]))
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations=set(["T1_US_FNAL_Disk"]))
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T1_US_FNAL_Disk"]))
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations=set(["T1_US_FNAL_Disk"]))
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations=set(["T1_US_FNAL_Disk"]))
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations=set(["T1_US_FNAL_Disk"]))
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations=set(["T1_US_FNAL_Disk"]))
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=102400,
                       first_event=2048,
                       locations=set(["T1_US_FNAL_Disk"]))
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn="fileIV",
                      size=102400,
                      events=1024,
                      first_event=3072,
                      locations=set(["T1_US_FNAL_Disk"]))
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for jobFile in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV
        ]:
            self.mergeFileset.addFile(jobFile)
            self.bogusFileset.addFile(jobFile)

        self.mergeFileset.commit()
        self.bogusFileset.commit()

        return

    def testMinMergeSize1(self):
        """
        _testMinMergeSize1_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance.  Verify that no merge jobs
        will be produced.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=200000,
                            max_merge_size=2000000000,
                            max_merge_events=200000000)

        assert len(result) == 0, \
            "ERROR: No job groups should be returned."

        return

    def testMinMergeSize1a(self):
        """
        _testMinMergeSize1a_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance and mark the fileset as
        closed.  Verify that one job containing all files is pushed out.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=200000,
                            max_merge_size=2000000,
                            max_merge_events=2000000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % len(result)

        assert len(result[0].jobs) == 1, \
            "Error: One job should have been returned: %s" % len(result[0].jobs)

        self.assertEqual(result[0].jobs[0]["estimatedDiskUsage"], 10 + 2 * 100)

        self.assertEqual(result[0].jobs[0]["possiblePSN"], set(["T1_US_FNAL"]))

        goldenFiles = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC",
            "fileI", "fileII", "fileIII", "fileIV"
        ]

        jobFiles = result[0].jobs[0].getFiles()

        currentRun = 0
        currentLumi = 0
        currentEvent = 0
        for jobFile in jobFiles:
            jobFile.loadData()
            self.assertTrue(jobFile["lfn"] in goldenFiles,
                            "Error: Unknown file: %s" % jobFile["lfn"])
            goldenFiles.remove(jobFile["lfn"])

            fileRun = list(jobFile["runs"])[0].run
            fileLumi = min(list(jobFile["runs"])[0])
            fileEvent = jobFile["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                    "ERROR: Files not ordered by lumi"

            if fileLumi == currentLumi:
                assert fileEvent >= currentEvent, \
                    "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        return

    def testMaxMergeSize(self):
        """
        _testMaxMergeSize_

        Set the maximum merge size to be 100000 bytes.  Verify that two merge
        jobs are created, one for the one large file and another for the rest of
        the files.  Verify that each merge job contains the expected files and
        that we merge across runs.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1,
                            max_merge_size=100000,
                            max_merge_events=200000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 2, \
            "ERROR: Two jobs should have been returned."

        goldenFilesA = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC",
            "fileI", "fileII", "fileIII"
        ]
        goldenFilesB = ["fileIV"]

        for job in result[0].jobs:

            self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"]))

            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                self.assertEqual(job["estimatedDiskUsage"], 11)
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                self.assertEqual(job["estimatedDiskUsage"], 2 * 100)
                goldenFiles = goldenFilesB

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for jobFile in jobFiles:
                self.assertTrue(jobFile["lfn"] in goldenFiles,
                                "Error: Unknown file: %s" % jobFile["lfn"])

                goldenFiles.remove(jobFile["lfn"])

            fileRun = list(jobFile["runs"])[0].run
            fileLumi = min(list(jobFile["runs"])[0])
            fileEvent = jobFile["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            assert fileRun >= currentRun, \
                "ERROR: Files not sorted by run."

            if fileRun == currentRun:
                assert fileLumi >= currentLumi, \
                    "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0, \
            "ERROR: Files missing from merge jobs."

        return

    def testMaxEvents(self):
        """
        _testMaxEvents_

        Verify the the max_merge_events parameter works and that we correctly
        merge across runs.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1,
                            max_merge_size=20000000,
                            max_merge_events=100000)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 2, \
            "ERROR: Two jobs should have been returned: %s" % len(result[0].jobs)

        goldenFilesA = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC",
            "fileI", "fileII", "fileIV"
        ]
        goldenFilesB = ["fileIII"]

        for job in result[0].jobs:

            self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"]))

            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                self.assertEqual(job["estimatedDiskUsage"], 9 + 2 * 100)
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                self.assertEqual(job["estimatedDiskUsage"], 2)
                goldenFiles = goldenFilesB

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for jobFile in jobFiles:
                self.assertTrue(jobFile["lfn"] in goldenFiles,
                                "Error: Unknown file: %s" % jobFile["lfn"])

                goldenFiles.remove(jobFile["lfn"])

                fileRun = list(jobFile["runs"])[0].run
                fileLumi = min(list(jobFile["runs"])[0])
                fileEvent = jobFile["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                    "ERROR: Files not sorted by run: %s, %s" % (fileRun, currentRun)

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                        "ERROR: Files not ordered by lumi"

                    if fileLumi == currentLumi:
                        assert fileEvent >= currentEvent, \
                            "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        assert len(goldenFilesA) == 0 and len(goldenFilesB) == 0 and \
               "ERROR: Files missing from merge jobs."

        return

    def testMinMergeSize1aNoRunMerge(self):
        """
        _testMinMergeSize1aNoRunMerge_

        Set the minimum merge size to be 20,000 bytes which is more than the
        sum of all file sizes in the WMBS instance and mark the fileset as
        closed.  Verify that two jobs are pushed out and that we don't merge
        accross run boundaries.
        """
        self.stuffWMBS()
        self.mergeFileset.markOpen(False)

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=200000,
                            max_merge_size=2000000,
                            max_merge_events=2000000,
                            merge_across_runs=False)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % len(result)

        assert len(result[0].jobs) == 2, \
            "Error: Two jobs should have been returned: %s" % len(result[0].jobs)

        goldenFilesA = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC"
        ]
        goldenFilesB = ["fileI", "fileII", "fileIII", "fileIV"]
        goldenFilesA.sort()
        goldenFilesB.sort()

        for job in result[0].jobs:

            self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"]))

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            jobLFNs = []

            for jobFile in job.getFiles():
                jobFile.loadData()
                jobLFNs.append(jobFile["lfn"])

                fileRun = list(jobFile["runs"])[0].run
                fileLumi = min(list(jobFile["runs"])[0])
                fileEvent = jobFile["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                assert fileRun >= currentRun, \
                    "ERROR: Files not sorted by run."

                if fileRun == currentRun:
                    assert fileLumi >= currentLumi, \
                        "ERROR: Files not ordered by lumi"

                if fileLumi == currentLumi:
                    assert fileEvent >= currentEvent, \
                        "ERROR: Files not ordered by first event"

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

            jobLFNs.sort()
            if jobLFNs == goldenFilesA:
                self.assertEqual(job["estimatedDiskUsage"], 8)
                goldenFilesA = []
            else:
                self.assertEqual(job["estimatedDiskUsage"], 3 + 2 * 100)
                self.assertEqual(jobLFNs, goldenFilesB,
                                 "Error: LFNs do not match.")
                goldenFilesB = []

        return

    def testMaxMergeSizeNoRunMerge(self):
        """
        _testMaxMergeSizeNoRunMerge_

        Set the maximum merge size to be 100000 bytes.  Verify that two merge
        jobs are created, one for the one large file and another for the rest of
        the files.  Verify that each merge job contains the expected files and
        that we don't merge across run boundaries.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1,
                            max_merge_size=100000,
                            max_merge_events=200000,
                            merge_across_runs=False)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned: %s" % result

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned."

        goldenFilesA = [
            "file1", "file2", "file3", "file4", "fileA", "fileB", "fileC"
        ]
        goldenFilesB = ["fileI", "fileII", "fileIII"]
        goldenFilesC = ["fileIV"]

        for job in result[0].jobs:

            self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"]))

            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                self.assertEqual(job["estimatedDiskUsage"], 8)
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                self.assertEqual(job["estimatedDiskUsage"], 4)
                goldenFiles = goldenFilesB
            else:
                self.assertEqual(job["estimatedDiskUsage"], 2 * 100)
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for jobFile in jobFiles:
                self.assertTrue(jobFile["lfn"] in goldenFiles,
                                "Error: Unknown file: %s" % jobFile["lfn"])

                goldenFiles.remove(jobFile["lfn"])

            fileRun = list(jobFile["runs"])[0].run
            fileLumi = min(list(jobFile["runs"])[0])
            fileEvent = jobFile["first_event"]

            if currentRun == 0:
                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent
                continue

            self.assertTrue(fileRun >= currentRun,
                            "ERROR: Files not sorted by run.")
            if fileRun == currentRun:
                self.assertTrue(fileLumi >= currentLumi,
                                "ERROR: Files not ordered by lumi")
                if fileLumi == currentLumi:
                    self.assertTrue(fileEvent >= currentEvent,
                                    "ERROR: Files not ordered by first event")

            currentRun = fileRun
            currentLumi = fileLumi
            currentEvent = fileEvent

        self.assertTrue(
            len(goldenFilesA) == 0 and len(goldenFilesB) == 0,
            "ERROR: Files missing from merge jobs.")

        return

    def testMaxEventsNoRunMerge(self):
        """
        _testMaxEventsNoRunMerge_

        Verify that the max events merge parameter works correctly and that we
        don't merge accross run boundaries.
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=1,
                            max_merge_size=20000000,
                            max_merge_events=100000,
                            merge_across_runs=False)

        self.assertTrue(
            len(result) == 1,
            "ERROR: More than one JobGroup returned: %s" % result)

        self.assertTrue(
            len(result[0].jobs) == 3,
            "ERROR: Three jobs should have been returned: %s" %
            len(result[0].jobs))

        goldenFilesA = [
            "file1",
            "file2",
            "file3",
            "file4",
            "fileA",
            "fileB",
            "fileC",
        ]
        goldenFilesB = ["fileI", "fileII", "fileIV"]
        goldenFilesC = ["fileIII"]

        for job in result[0].jobs:

            self.assertEqual(job["possiblePSN"], set(["T1_US_FNAL"]))

            jobFiles = job.getFiles()

            if jobFiles[0]["lfn"] in goldenFilesA:
                self.assertEqual(job["estimatedDiskUsage"], 8)
                goldenFiles = goldenFilesA
            elif jobFiles[0]["lfn"] in goldenFilesB:
                self.assertEqual(job["estimatedDiskUsage"], 2 + 2 * 100)
                goldenFiles = goldenFilesB
            else:
                self.assertEqual(job["estimatedDiskUsage"], 2 * 1)
                goldenFiles = goldenFilesC

            currentRun = 0
            currentLumi = 0
            currentEvent = 0
            for jobFile in jobFiles:
                self.assertTrue(jobFile["lfn"] in goldenFiles,
                                "Error: Unknown file: %s" % jobFile["lfn"])

                goldenFiles.remove(jobFile["lfn"])

                fileRun = list(jobFile["runs"])[0].run
                fileLumi = min(list(jobFile["runs"])[0])
                fileEvent = jobFile["first_event"]

                if currentRun == 0:
                    currentRun = fileRun
                    currentLumi = fileLumi
                    currentEvent = fileEvent
                    continue

                self.assertTrue(
                    fileRun >= currentRun,
                    "ERROR: Files not sorted by run: %s, %s" %
                    (fileRun, currentRun))
                if fileRun == currentRun:
                    self.assertTrue(fileLumi >= currentLumi,
                                    "ERROR: Files not ordered by lumi")
                    if fileLumi == currentLumi:
                        self.assertTrue(
                            fileEvent >= currentEvent,
                            "ERROR: Files not ordered by first event")

                currentRun = fileRun
                currentLumi = fileLumi
                currentEvent = fileEvent

        self.assertTrue(
            len(goldenFilesA) == 0 and len(goldenFilesB) == 0
            and len(goldenFilesC) == 0,
            "ERROR: Files missing from merge jobs.")

        return

    def testLocationMerging(self):
        """
        _testLocationMerging_

        Verify that files residing on different SEs are not merged together in
        the same job.
        """
        self.stuffWMBS()

        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T1_UK_RAL", pnn="T1_UK_RAL_Disk")

        fileSite2 = File(lfn="fileRAL",
                         size=4098,
                         events=1024,
                         first_event=0,
                         locations=set(["T1_UK_RAL_Disk"]))
        fileSite2.addRun(Run(1, *[46]))
        fileSite2.create()

        self.mergeFileset.addFile(fileSite2)
        self.mergeFileset.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)

        assert len(result) == 1, \
            "ERROR: More than one JobGroup returned."

        assert len(result[0].jobs) == 3, \
            "ERROR: Three jobs should have been returned."

        ralJobs = 0
        fnalJobs = 0
        for job in result[0].jobs:
            if job["possiblePSN"] == set(["T1_UK_RAL"]):
                ralJobs += 1
            elif job["possiblePSN"] == set(["T1_US_FNAL"]):
                fnalJobs += 1

        self.assertEqual(ralJobs, 1)
        self.assertEqual(fnalJobs, 2)

        return

    def testMaxWaitTime(self):
        """
        _testMaxWaitTime_

        Set the max wait times to be negative - this should force all files to merge
        out immediately

        Using the first setup as the first merge test which should normally produce
        no jobGroups
        """
        self.stuffWMBS()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)

        result = jobFactory(min_merge_size=200000,
                            max_merge_size=2000000000,
                            max_merge_events=200000000,
                            max_wait_time=-10)

        # Everything should be in one, small jobGroup
        self.assertEqual(len(result), 1)
        self.assertEqual(len(result[0].jobs), 1)
        job = result[0].jobs[0]
        # All files should be in one job
        self.assertEqual(len(job.getFiles()), 11)

        return

    def testDifferentSubscritionIDs(self):
        """
        _testDifferentSubscriptionIDs_

        Make sure that the merge splitting still runs if the subscription ID
        is not equal to the workflow ID.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()
        dummyWorkflow = Workflow(name="dummyWorkflow",
                                 spec="bunk49",
                                 owner="Steve",
                                 task="Test2")
        dummyWorkflow.create()
        dummyFileset = Fileset(name="dummyFileset")
        dummyFileset.create()
        dummySubscription1 = Subscription(fileset=dummyFileset,
                                          workflow=dummyWorkflow,
                                          split_algo="ParentlessMergeBySize")
        dummySubscription2 = Subscription(fileset=dummyFileset,
                                          workflow=dummyWorkflow,
                                          split_algo="ParentlessMergeBySize")
        dummySubscription1.create()
        dummySubscription2.create()
        myThread.transaction.commit()

        self.stuffWMBS()
        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=self.mergeSubscription)
        result = jobFactory(min_merge_size=4097,
                            max_merge_size=99999999,
                            max_merge_events=999999999,
                            merge_across_runs=False)
        self.assertEqual(len(result), 1)
        jobGroup = result[0]
        self.assertEqual(len(jobGroup.jobs), 2)
        return
Example #57
0
    def injectJobs(self):
        """
        _injectJobs_

        Inject two workflows into WMBS and save the job objects to disk.
        """
        testWorkflowA = Workflow(spec="specA.pkl",
                                 owner="Steve",
                                 name="wf001",
                                 task="TestTaskA")
        testWorkflowA.create()
        testWorkflowB = Workflow(spec="specB.pkl",
                                 owner="Steve",
                                 name="wf002",
                                 task="TestTaskB")
        testWorkflowB.create()

        testFileset = Fileset("testFileset")
        testFileset.create()

        testSubA = Subscription(fileset=testFileset, workflow=testWorkflowA)
        testSubA.create()
        testSubB = Subscription(fileset=testFileset, workflow=testWorkflowB)
        testSubB.create()

        testGroupA = JobGroup(subscription=testSubA)
        testGroupA.create()
        testGroupB = JobGroup(subscription=testSubB)
        testGroupB.create()

        stateChanger = ChangeState(self.createConfig(),
                                   "jobsubmittercaching_t")

        for i in range(10):
            newFile = File(lfn="testFile%s" % i,
                           locations=set(["se.T1_US_FNAL", "se.T1_UK_RAL"]))
            newFile.create()

            newJobA = Job(name="testJobA-%s" % i, files=[newFile])
            newJobA["workflow"] = "wf001"
            newJobA["possiblePSN"] = ["T1_US_FNAL"]
            newJobA["sandbox"] = "%s/somesandbox" % self.testDir
            newJobA["owner"] = "Steve"

            jobCacheDir = os.path.join(self.testDir, "jobA-%s" % i)
            os.mkdir(jobCacheDir)
            newJobA["cache_dir"] = jobCacheDir
            newJobA["type"] = "Processing"
            newJobA.create(testGroupA)

            jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "w")
            pickle.dump(newJobA, jobHandle)
            jobHandle.close()

            stateChanger.propagate([newJobA], "created", "new")

            newJobB = Job(name="testJobB-%s" % i, files=[newFile])
            newJobB["workflow"] = "wf001"
            newJobB["possiblePSN"] = ["T1_UK_RAL"]
            newJobB["sandbox"] = "%s/somesandbox" % self.testDir
            newJobB["owner"] = "Steve"

            jobCacheDir = os.path.join(self.testDir, "jobB-%s" % i)
            os.mkdir(jobCacheDir)
            newJobB["cache_dir"] = jobCacheDir
            newJobB["type"] = "Processing"
            newJobB.create(testGroupB)

            jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "w")
            pickle.dump(newJobB, jobHandle)
            jobHandle.close()

            stateChanger.propagate([newJobB], "created", "new")

        return
Example #58
0
    def testC_ACDCTest(self):
        """
        _ACDCTest_

        Test whether we can get a goodRunList out of ACDC
        and process it correctly.
        """
        workload = self.createTestWorkload()
        dcs = DataCollectionService(url=self.testInit.couchUrl, database=self.testInit.couchDbName)

        testFileA = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileA.addRun(Run(1, 1, 2))
        testFileA.create()
        testFileB = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileB.addRun(Run(1, 3))
        testFileB.create()
        testJobA = getJob(workload)
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)

        testFileC = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileC.addRun(Run(1, 4, 6))
        testFileC.create()
        testJobB = getJob(workload)
        testJobB.addFile(testFileC)

        testFileD = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileD.addRun(Run(1, 7))
        testFileD.create()
        testJobC = getJob(workload)
        testJobC.addFile(testFileD)

        testFileE = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileE.addRun(Run(1, 11, 12))
        testFileE.create()
        testJobD = getJob(workload)
        testJobD.addFile(testFileE)

        testFileF = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileF.addRun(Run(2, 5, 6, 7))
        testFileF.create()
        testJobE = getJob(workload)
        testJobE.addFile(testFileF)

        testFileG = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileG.addRun(Run(2, 10, 11, 12))
        testFileG.create()
        testJobF = getJob(workload)
        testJobF.addFile(testFileG)

        testFileH = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileH.addRun(Run(2, 15))
        testFileH.create()
        testJobG = getJob(workload)
        testJobG.addFile(testFileH)

        testFileI = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileI.addRun(Run(3, 20))
        testFileI.create()
        testJobH = getJob(workload)
        testJobH.addFile(testFileI)

        testFileJ = File(lfn=makeUUID(), size=1024, events=1024, locations="T1_US_FNAL_Disk")
        testFileJ.addRun(Run(1, 9))
        testFileJ.create()
        testJobI = getJob(workload)
        testJobI.addFile(testFileJ)

        # dcs.failedJobs([testJobA, testJobB, testJobC, testJobD, testJobE,
        #                testJobF, testJobG, testJobH, testJobI])

        dcs.failedJobs([testJobA, testJobD, testJobH])

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)
        testFileset.addFile(testFileH)
        testFileset.addFile(testFileI)
        testFileset.addFile(testFileJ)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        jobGroups = jobFactory(lumis_per_job=100,
                               halt_job_on_file_boundaries=False,
                               splitOnRun=True,
                               collectionName=workload.name(),
                               filesetName=workload.getTask("reco").getPathName(),
                               owner="evansde77",
                               group="DMWM",
                               couchURL=self.testInit.couchUrl,
                               couchDB=self.testInit.couchDbName,
                               performance=self.performanceParams)

        self.assertEqual(jobGroups[0].jobs[0]['mask'].getRunAndLumis(), {1: [[1, 2], [3, 3], [11, 12]]})
        self.assertEqual(jobGroups[0].jobs[1]['mask'].getRunAndLumis(), {3: [[20, 20]]})

        return
Example #59
0
    def testParallelProcessing(self):
        """
        _testParallelProcessing_

        Verify that merging works correctly when multiple processing
        subscriptions are run over the same input files.  The merging algorithm
        should ignore processing jobs that feed into different merge
        subscriptions.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="T2_CH_CERN", pnn="T2_CH_CERN")
        locationAction.execute(siteName="T1_US_FNAL", pnn="T2_CH_CERN")

        mergeFilesetA = Fileset(name="mergeFilesetA")
        mergeFilesetB = Fileset(name="mergeFilesetB")
        mergeFilesetA.create()
        mergeFilesetB.create()

        mergeMergedFilesetA = Fileset(name="mergeMergedFilesetA")
        mergeMergedFilesetB = Fileset(name="mergeMergedFilesetB")
        mergeMergedFilesetA.create()
        mergeMergedFilesetB.create()

        mergeWorkflow = Workflow(name="mergeWorkflow", spec="bogus",
                                 owner="Steve", task="Test")
        mergeWorkflow.create()

        mergeSubscriptionA = Subscription(fileset=mergeFilesetA,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionB = Subscription(fileset=mergeFilesetB,
                                          workflow=mergeWorkflow,
                                          split_algo="WMBSMergeBySize")
        mergeSubscriptionA.create()
        mergeSubscriptionB.create()

        inputFileset = Fileset(name="inputFileset")
        inputFileset.create()

        inputFileA = File(lfn="inputLFNA")
        inputFileB = File(lfn="inputLFNB")
        inputFileA.create()
        inputFileB.create()

        procWorkflowA = Workflow(name="procWorkflowA", spec="bunk2",
                                 owner="Steve", task="Test")
        procWorkflowA.create()
        procWorkflowA.addOutput("output", mergeFilesetA, mergeMergedFilesetA)
        procWorkflowB = Workflow(name="procWorkflowB", spec="bunk3",
                                 owner="Steve", task="Test2")
        procWorkflowB.create()
        procWorkflowB.addOutput("output", mergeFilesetB, mergeMergedFilesetB)

        procSubscriptionA = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowA,
                                         split_algo="EventBased")
        procSubscriptionA.create()
        procSubscriptionB = Subscription(fileset=inputFileset,
                                         workflow=procWorkflowB,
                                         split_algo="EventBased")
        procSubscriptionB.create()

        jobGroupA = JobGroup(subscription=procSubscriptionA)
        jobGroupA.create()
        jobGroupB = JobGroup(subscription=procSubscriptionB)
        jobGroupB.create()

        changeStateDAO = self.daoFactory(classname="Jobs.ChangeState")

        testJobA = Job()
        testJobA.addFile(inputFileA)
        testJobA.create(jobGroupA)
        testJobA["state"] = "cleanout"
        testJobA["oldstate"] = "new"
        testJobA["couch_record"] = "somejive"
        testJobA["retry_count"] = 0
        testJobA["outcome"] = "success"
        testJobA.save()

        testJobB = Job()
        testJobB.addFile(inputFileB)
        testJobB.create(jobGroupA)
        testJobB["state"] = "cleanout"
        testJobB["oldstate"] = "new"
        testJobB["couch_record"] = "somejive"
        testJobB["retry_count"] = 0
        testJobB["outcome"] = "success"
        testJobB.save()

        testJobC = Job()
        testJobC.addFile(inputFileA)
        testJobC.create(jobGroupB)
        testJobC["state"] = "cleanout"
        testJobC["oldstate"] = "new"
        testJobC["couch_record"] = "somejive"
        testJobC["retry_count"] = 0
        testJobC["outcome"] = "success"
        testJobC.save()

        testJobD = Job()
        testJobD.addFile(inputFileA)
        testJobD.create(jobGroupB)
        testJobD["state"] = "cleanout"
        testJobD["oldstate"] = "new"
        testJobD["couch_record"] = "somejive"
        testJobD["retry_count"] = 0
        testJobD["outcome"] = "failure"
        testJobD.save()

        testJobE = Job()
        testJobE.addFile(inputFileB)
        testJobE.create(jobGroupB)
        testJobE["state"] = "cleanout"
        testJobE["oldstate"] = "new"
        testJobE["couch_record"] = "somejive"
        testJobE["retry_count"] = 0
        testJobE["outcome"] = "success"
        testJobE.save()

        testJobF = Job()
        testJobF.addFile(inputFileB)
        testJobF.create(jobGroupB)
        testJobF["state"] = "cleanout"
        testJobF["oldstate"] = "new"
        testJobF["couch_record"] = "somejive"
        testJobF["retry_count"] = 0
        testJobF["outcome"] = "failure"
        testJobF.save()

        changeStateDAO.execute([testJobA, testJobB, testJobC, testJobD,
                                testJobE, testJobF])

        fileA = File(lfn="fileA", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileA.addRun(Run(1, *[45]))
        fileA.create()
        fileA.addParent(inputFileA["lfn"])
        fileB = File(lfn="fileB", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileB.addRun(Run(1, *[45]))
        fileB.create()
        fileB.addParent(inputFileB["lfn"])

        jobGroupA.output.addFile(fileA)
        jobGroupA.output.addFile(fileB)
        jobGroupA.output.commit()

        mergeFilesetA.addFile(fileA)
        mergeFilesetA.addFile(fileB)
        mergeFilesetA.commit()

        fileC = File(lfn="fileC", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileC.addRun(Run(1, *[45]))
        fileC.create()
        fileC.addParent(inputFileA["lfn"])
        fileD = File(lfn="fileD", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileD.addRun(Run(1, *[45]))
        fileD.create()
        fileD.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileC)
        jobGroupB.output.addFile(fileD)

        mergeFilesetB.addFile(fileC)
        mergeFilesetB.addFile(fileD)
        mergeFilesetB.commit()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=mergeSubscriptionB)

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 0, \
            "Error: No merge jobs should have been created."

        fileE = File(lfn="fileE", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileE.addRun(Run(1, *[45]))
        fileE.create()
        fileE.addParent(inputFileA["lfn"])
        fileF = File(lfn="fileF", size=1024, events=1024, first_event=0,
                     locations={"T2_CH_CERN"})
        fileF.addRun(Run(1, *[45]))
        fileF.create()
        fileF.addParent(inputFileB["lfn"])

        jobGroupB.output.addFile(fileE)
        jobGroupB.output.addFile(fileF)

        mergeFilesetB.addFile(fileE)
        mergeFilesetB.addFile(fileF)
        mergeFilesetB.commit()

        testJobD["outcome"] = "success"
        testJobD.save()
        testJobF["outcome"] = "success"
        testJobF.save()

        changeStateDAO.execute([testJobD, testJobF])

        result = jobFactory(min_merge_size=1, max_merge_size=20000,
                            max_merge_events=7169)

        assert len(result) == 1, \
            "Error: One merge job should have been created: %s" % len(result)

        return
Example #60
0
    def testOutput(self):
        """
        _testOutput_

        Creat a workflow and add some outputs to it.  Verify that these are
        stored to and loaded from the database correctly.
        """
        testFilesetA = Fileset(name="testFilesetA")
        testMergedFilesetA = Fileset(name="testMergedFilesetA")
        testFilesetB = Fileset(name="testFilesetB")
        testMergedFilesetB = Fileset(name="testMergedFilesetB")
        testFilesetC = Fileset(name="testFilesetC")
        testMergedFilesetC = Fileset(name="testMergedFilesetC")
        testFilesetA.create()
        testFilesetB.create()
        testFilesetC.create()
        testMergedFilesetA.create()
        testMergedFilesetB.create()
        testMergedFilesetC.create()

        testWorkflowA = Workflow(spec="spec.xml",
                                 owner="Simon",
                                 name="wf001",
                                 task='Test')
        testWorkflowA.create()

        testWorkflowB = Workflow(name="wf001", task='Test')
        testWorkflowB.load()

        self.assertEqual(len(testWorkflowB.outputMap.keys()), 0,
                         "ERROR: Output map exists before output is assigned")

        testWorkflowA.addOutput("outModOne", testFilesetA, testMergedFilesetA)
        testWorkflowA.addOutput("outModOne", testFilesetC, testMergedFilesetC)
        testWorkflowA.addOutput("outModTwo", testFilesetB, testMergedFilesetB)

        testWorkflowC = Workflow(name="wf001", task='Test')
        testWorkflowC.load()

        self.assertEqual(len(testWorkflowC.outputMap.keys()), 2,
                         "ERROR: Incorrect number of outputs in output map")
        self.assertTrue(
            "outModOne" in testWorkflowC.outputMap.keys(),
            "ERROR: Output modules missing from workflow output map")
        self.assertTrue(
            "outModTwo" in testWorkflowC.outputMap.keys(),
            "ERROR: Output modules missing from workflow output map")

        for outputMap in testWorkflowC.outputMap["outModOne"]:
            if outputMap["output_fileset"].id == testFilesetA.id:
                self.assertEqual(
                    outputMap["merged_output_fileset"].id,
                    testMergedFilesetA.id,
                    "Error: Output map incorrectly maps filesets.")
            else:
                self.assertEqual(
                    outputMap["merged_output_fileset"].id,
                    testMergedFilesetC.id,
                    "Error: Output map incorrectly maps filesets.")
                self.assertEqual(
                    outputMap["output_fileset"].id, testFilesetC.id,
                    "Error: Output map incorrectly maps filesets.")

        self.assertEqual(
            testWorkflowC.outputMap["outModTwo"][0]
            ["merged_output_fileset"].id, testMergedFilesetB.id,
            "Error: Output map incorrectly maps filesets.")
        self.assertEqual(
            testWorkflowC.outputMap["outModTwo"][0]["output_fileset"].id,
            testFilesetB.id, "Error: Output map incorrectly maps filesets.")
        return