def setUp(self):
        """
        _setUp_

        Install the DBSBuffer schema into the database and connect to PhEDEx.
        """
        self.phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        self.dbsURL = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase = True)

        self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "DBSBufferFiles.AddLocation")
        locationAction.execute(siteName = "srm-cms.cern.ch")
        locationAction.execute(siteName = "se.fnal.gov")

        self.testFilesA = []
        self.testFilesB = []
        self.testDatasetA = "/%s/PromptReco-v1/RECO" % makeUUID()
        self.testDatasetB = "/%s/CRUZET11-v1/RAW" % makeUUID()
        self.phedex = PhEDEx({"endpoint": self.phedexURL}, "json")

        return
Example #2
0
    def createTestJob(subscriptionType="Merge"):
        """
        _createTestJob_

        Create a test job with two files as input.  This will also create the
        appropriate workflow, jobgroup and subscription.
        """

        testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))
        testFileA.create()
        testFileB.create()

        testJob = Job(name=makeUUID(), files=[testFileA, testFileB])
        testJob["couch_record"] = "somecouchrecord"
        testJob["location"] = "test.site.ch"
        testJob.create(group=testJobGroup)
        testJob.associateFiles()

        return testJob
Example #3
0
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)

        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                      "Saturn")
        xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl,
                                                   testSub.getDatasetPaths())
        result = self.phedexApi.subscribe(testSub, xmlData)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue("id" in requestIDs[0],
                        "Error: Missing request ID")
        return
Example #4
0
    def testLotsOfAncestors(self):
        """
        _testLotsOfAncestors_

        Create a file with 15 parents with each parent having 100 parents to
        verify that the query to return grandparents works correctly.
        """
        raise nose.SkipTest
        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10,
                         checksums={"cksum": "1"}, locations="se1.fnal.gov")
        testFileA.create()

        for i in xrange(15):
            testParent = File(lfn=makeUUID(), size=1024, events=10,
                              checksums={"cksum": "1"}, locations="se1.fnal.gov")
            testParent.create()
            testFileA.addParent(testParent["lfn"])

            for i in xrange(100):
                testGParent = File(lfn=makeUUID(), size=1024, events=10,
                                   checksums={"cksum": "1"}, locations="se1.fnal.gov")
                testGParent.create()
                testParent.addParent(testGParent["lfn"])

        assert len(testFileA.getAncestors(level=2, type="lfn")) == 1500, \
            "ERROR: Incorrect grand parents returned"

        return
Example #5
0
    def testMask(self):
        """
        _testMask_

        Test the new mask setup
        """

        testWorkflow = Workflow(spec="spec.xml", owner="Steve",
                                name="wf001", task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102])
        testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202])
        testJob.create(group=testJobGroup)

        loadJob = Job(id=testJob.exists())
        loadJob.loadData()

        runs = loadJob['mask'].getRunAndLumis()
        self.assertEqual(len(runs), 2)
        self.assertEqual(runs[100], [[101, 102]])
        self.assertEqual(runs[200], [[201, 202]])

        bigRun = Run(100, *[101, 102, 103, 104])
        badRun = Run(300, *[1001, 1002])
        result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun])

        self.assertEqual(len(result), 1)
        alteredRun = result.pop()
        self.assertEqual(alteredRun.run, 100)
        self.assertEqual(alteredRun.lumis, [101, 102])

        run0 = Run(300, *[1001, 1002])
        run1 = Run(300, *[1001, 1002])
        loadJob['mask'].filterRunLumisByMask([run0, run1])

        return
Example #6
0
    def testTime(self):

        nUIDs     = 100000
        startTime = time.clock()
        for i in range(0,nUIDs):
            makeUUID()
        print("We can make %i UUIDs in %f seconds" %(nUIDs, time.clock() - startTime))
Example #7
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site,
                        taskType='Processing', name=None, wfPrio=1, changeState=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        changeState is an instance of the ChangeState class to make job status changes
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec, owner="tapas",
                                name=name, task="basicWorkload/Production",
                                priority=wfPrio)
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):
            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type=taskType,
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name, task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        if changeState:
            for group in jobGroupList:
                changeState.propagate(group.jobs, 'created', 'new')

        return jobGroupList
Example #8
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.addRun(Run(i, *[45+i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100)
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name = "TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.addRun(Run(1, *[45+i/3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name = "TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)


        testWorkflow = Workflow()
        self.multipleFileSubscription  = Subscription(fileset = self.multipleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")
        self.singleFileSubscription    = Subscription(fileset = self.singleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")
        self.multipleLumiSubscription  = Subscription(fileset = self.multipleFileLumiset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")
        self.singleLumiSubscription    = Subscription(fileset = self.singleLumiFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "FixedDelay",
                                                      type = "Processing")


        return
Example #9
0
    def testListRunningJobs(self):
        """
        _testListRunningJobs_

        Test the ListRunningJobs DAO.
        """
        testWorkflow = Workflow(spec = makeUUID(), owner = "Steve",
                                name = makeUUID(), task="Test")
        testWorkflow.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = testWorkflow,
                                        type = "Processing")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testJobA = Job(name = makeUUID(), files = [])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group = testJobGroup)
        testJobA["state"] = "executing"

        testJobB = Job(name = makeUUID(), files = [])
        testJobB["couch_record"] = makeUUID()
        testJobB.create(group = testJobGroup)
        testJobB["state"] = "complete"

        testJobC = Job(name = makeUUID(), files = [])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group = testJobGroup)
        testJobC["state"] = "new"

        changeStateAction = self.daoFactory(classname = "Jobs.ChangeState")
        changeStateAction.execute(jobs = [testJobA, testJobB, testJobC])

        runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs")
        runningJobs = runningJobsAction.execute()

        assert len(runningJobs) == 2, \
               "Error: Wrong number of running jobs returned."

        for runningJob in runningJobs:
            if runningJob["job_name"] == testJobA["name"]:
                assert runningJob["state"] == testJobA["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobA["couch_record"], \
                       "Error: Running job has wrong couch record."
            else:
                assert runningJob["job_name"] == testJobC["name"], \
                       "Error: Running job has wrong name."
                assert runningJob["state"] == testJobC["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobC["couch_record"], \
                       "Error: Running job has wrong couch record."

        return
Example #10
0
def createFile():
    """
    _createFile_

    Create a file with some random metdata.
    """
    newFile = File(lfn = makeUUID(), size = random.randrange(1024, 1048576, 1024),
                   events = random.randrange(10, 100000, 50),
                   parents = [File(lfn = makeUUID())],
                   locations = makeUUID())
    newFile["first_event"] = 0
    newFile["last_event"] = 0
    newFile["id"] = 1
    return newFile
Example #11
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site,
                        taskType='Processing', name=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec, owner="tapas",
                                name=name, task="basicWorkload/Production",
                                priority=1)
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type=taskType,
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name, task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList
Example #12
0
    def testC_Profile(self):
        """
        _Profile_

        DON'T RUN THIS!
        """
        import cProfile
        import pstats

        name = makeUUID()

        config = self.getConfig()

        jobList = self.createGiantJobSet(name=name, config=config,
                                         nSubs=10, nJobs=1000, nFiles=10)

        cleanCouch = CleanCouchPoller(config=config)
        cleanCouch.setup()

        cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename="testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()
        return
Example #13
0
    def getBlock(self, newFile, location, skipOpenCheck=False):
        """
        _getBlock_

        Retrieve a block is one exists with matching datasetpath/location and is open.
        If no such block is found create and return a new one.
        """
        datasetpath = newFile["datasetPath"]

        for block in self.blockCache.values():
            if datasetpath == block.getDatasetPath() and location == block.getLocation():
                if not self.isBlockOpen(newFile=newFile, block=block) and not skipOpenCheck:
                    # Block isn't open anymore.  Mark it as pending so that it gets uploaded.
                    block.setPendingAndCloseBlock()
                else:
                    return block

        # A suitable open block does not exist.  Create a new one.
        blockname = "%s#%s" % (datasetpath, makeUUID())
        newBlock = DBSBufferBlock(name=blockname,
                                  location=location,
                                  datasetpath=datasetpath)

        parent = self.datasetParentageCache.get(datasetpath)
        if parent:
            newBlock.addDatasetParent(parent)
            logging.debug("Get block: Child dataset %s, Parent dataset %s", datasetpath, parent)

        self.blockCache[blockname] = newBlock
        return newBlock
Example #14
0
    def testTestNonProxySplitting(self):
        """
        _TestNonProxySplitting_

        Test and see if we can split things without a proxy.
        """

        config = self.getConfig()
        config.JobCreator.workerThreads = 1

        name = makeUUID()
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)

        workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl')

        self.stuffWMBS(workflowURL=workloadPath, name=name)

        testJobCreator = JobCreatorPoller(config=config)

        testJobCreator.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        self.assertEqual(len(result), 1)

        result = getJobsAction.execute(state='Created', jobType="Merge")
        self.assertEqual(len(result), 0)

        return
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
                                          i, lumisPerFile, 'malpaquet')
                testFileset.addFile(newFile)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        return testSubscription
Example #16
0
File: Express.py Project: dmwm/T0
    def createJob(self, streamerList, jobEvents, jobSize, timePerEvent, sizePerEvent, memoryRequirement):
        """
        _createJob_

        create an express job processing
        the passed in list of streamers

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization (twice)
        #   - 0.5MB/s repack speed
        #   - reco with timePerEvent
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #   - streamer or RAW on local disk (factor 1)
        #   - FEVT/ALCARECO/DQM on local disk (sizePerEvent)
        jobTime = 600 + jobSize/500000 + jobEvents*timePerEvent + (jobEvents*sizePerEvent*2)/5000000
        self.currentJob.addResourceEstimates(jobTime = min(jobTime, 47*3600),
                                             disk = min(jobSize/1024 + jobEvents*sizePerEvent, 20000000),
                                             memory = memoryRequirement)

        return
Example #17
0
    def __call__(self):
        """
        __call__

        Generate some random data
        """

        # Generate somewhere between one and a thousand files
        name = "ThisIsATest_%s" % (makeUUID())
        nFiles = random.randint(10, 2000)
        name = name.replace('-', '_')
        name = '%s-v0' % name
        files = self.getFiles(name = name, nFiles = nFiles)

        print("Inserting %i files for dataset %s" % (nFiles * 2, name))

        try:
            self.dbsUploader.algorithm()
        except:
            self.dbsUploader.close()
            raise

        # Repeat just to make sure
        try:
            self.dbsUploader.algorithm()
        except:
            self.dbsUploader.close()
            raise


        return
Example #18
0
    def testD_Timing(self):
        """
        _Timing_

        This is to see how fast things go.
        """
        myThread = threading.currentThread()

        name = makeUUID()

        config = self.getConfig()
        jobList = self.createGiantJobSet(name=name, config=config, nSubs=10,
                                         nJobs=1000, nFiles=10)

        testTaskArchiver = TaskArchiverPoller(config=config)

        startTime = time.time()
        testTaskArchiver.algorithm()
        stopTime = time.time()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)
        testWMBSFileset = Fileset(id=1)
        self.assertEqual(testWMBSFileset.exists(), False)

        logging.info("TaskArchiver took %f seconds", (stopTime - startTime))
Example #19
0
    def testProfilePoller(self):
        """
        Profile your performance
        You shouldn't be running this normally because it doesn't do anything
        """

        name = makeUUID()
        nSubs = 5
        nFiles = 1500
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl')

        self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath)

        config = self.getConfig()

        testJobCreator = JobCreatorPoller(config=config)
        cProfile.runctx("testJobCreator.algorithm()", globals(), locals(), filename="testStats.stat")

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        time.sleep(10)

        self.assertEqual(len(result), nSubs * nFiles)

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(.2)

        return
Example #20
0
    def testProfileWorker(self):
        """
        Profile where the work actually gets done
        You shouldn't be running this one either, since it doesn't test anything.
        """

        name = makeUUID()
        nSubs = 5
        nFiles = 500
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl')

        self.createJobCollection(name=name, nSubs=nSubs, nFiles=nFiles, workflowURL=workloadPath)

        config = self.getConfig()

        configDict = {"couchURL": config.JobStateMachine.couchurl,
                      "couchDBName": config.JobStateMachine.couchDBName,
                      'jobCacheDir': config.JobCreator.jobCacheDir,
                      'defaultJobType': config.JobCreator.defaultJobType}

        subs = [{"subscription": 1}, {"subscription": 2}, {"subscription": 3}, {"subscription": 4},
                {"subscription": 5}]

        testJobCreator = JobCreatorPoller(**configDict)
        cProfile.runctx("testJobCreator.algorithm(parameters = input)", globals(), locals(), filename="workStats.stat")

        p = pstats.Stats('workStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(.2)

        return
Example #21
0
    def create(self, group):
        """
        _create_

        Write the job to the database.
        """
        if self["id"] is not None:
            return

        existingTransaction = self.beginTransaction()

        self["jobgroup"] = group.id

        if self["name"] is None:
            self["name"] = makeUUID()

        jobAction = self.daofactory(classname="Jobs.New")
        jobAction.execute(jobgroup=self["jobgroup"], name=self["name"],
                          couch_record=self["couch_record"],
                          location=self["location"], cache_dir=self['cache_dir'],
                          outcome=self['outcome'], fwjr=self['fwjr'],
                          conn=self.getDBConn(),
                          transaction=self.existingTransaction())

        self.exists()

        self['mask'].save(jobID=self['id'])

        self.associateFiles()
        self.associateWorkUnits()
        self.commitTransaction(existingTransaction)
        return
Example #22
0
    def test05(self):
        """
        _test05_

        Test multi lumis express merges with holes

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        time.sleep(1)

        mySplitArgs['maxLatency'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        return
Example #23
0
    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")


        return
    def testFileset(self):
        """
        _testFileset_

        Verify that converting an ACDC fileset to a DataStructs fileset works
        correctly.
        """
        testCollection = CouchCollection(database=self.testInit.couchDbName,
                                         url=self.testInit.couchUrl,
                                         name="Thunderstruck")
        testFileset = CouchFileset(database=self.testInit.couchDbName,
                                   url=self.testInit.couchUrl,
                                   name="TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn=lfn, size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.fileset().files:
            self.assertTrue(file["lfn"] in testFiles.keys(),
                            "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Example #25
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Implement merge algorithm for the subscription provided

        """
        fileset = list(self.subscription.availableFiles())

        mergeSize = int(kwargs['merge_size'])
        overflow  = bool(kwargs.get('all_files', False))
        fileset.sort()

        accumSize = 0
        jobFiles = Fileset()
        locationDict = self.sortByLocation()
        for location in locationDict:
            baseName = makeUUID()
            self.newGroup()
            for f in locationDict[location]:
                accumSize += f['size']
                jobFiles.addFile(f)
                if accumSize >= mergeSize:
                    self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
                                      files = jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
                    accumSize = 0
                    jobFiles = Fileset()

            if len(jobFiles) > 0:
                if overflow:
                    self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
                                      files = jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'T1_US_FNAL_Disk')
            newFile.create()
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
                                          i, lumisPerFile, 'T2_CH_CERN')
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        testSubscription.create()

        return testSubscription
Example #27
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        A time delay job splitting algorithm, will shove all unacquired
        files into a new job if the trigger_time has been passed
        """

        #  //
        # // get the fileset
        #//
        fileset = self.subscription.getFileset()
        trigger_time = int(kwargs['trigger_time'])
        if (trigger_time < time.time()):
            availFiles = self.subscription.availableFiles()
            if (len(availFiles) == 0):
                # no files to acquire
                return []

            baseName = makeUUID()
            self.newGroup()
            self.newJob(name = '%s-endofrun' % (baseName,))

            for f in availFiles:
                self.currentJob.addFile(f)
Example #28
0
File: Repack_t.py Project: dmwm/T0
    def test06(self):
        """
        _test06_

        Test repacking of 3 lumis
        2 small lumis (single job), followed by a big one (multiple jobs)

        files for lumi 1 and 2 are below multi-lumi thresholds
        files for lumi 3 are above single-lumi threshold

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3]:
            filecount = 2
            for i in range(filecount):
                if lumi == 3:
                    nevents = 500
                else:
                    nevents = 100
                newFile = File(makeUUID(), size = 1000, events = nevents)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxLatency'] = 50000
	mySplitArgs['maxInputEvents'] = 900
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

	self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1,
                         "ERROR: second job does not process 1 file")

        self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1,
                         "ERROR: third job does not process 1 file")

        return
Example #29
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """
        baseName = makeUUID()

        testWorkflow = Workflow(spec="spec.xml", owner="dmwm",
                                name="testWorkflow_%s" % baseName[:4], task="Test")
        testWorkflow.create()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'T1_US_FNAL_Disk')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
                                          i, lumisPerFile, 'T2_CH_CERN')
                testFileset.addFile(newFile)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        testSubscription.create()

        return testSubscription
Example #30
0
File: Repack_t.py Project: dmwm/T0
    def test03(self):
        """
        _test03_

        Test single lumi event threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxLatency'] = 50000
	jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 650
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return
Example #31
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName="site1", pnn="T2_CH_CERN")
        locationAction.execute(siteName="site2", pnn="T1_US_FNAL_Disk")

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["T2_CH_CERN"]))
            newFile.create()
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["T2_CH_CERN"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for i in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation("T2_CH_CERN")
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for i in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation(["T2_CH_CERN", "T1_US_FNAL_Disk"])
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.multipleFileSubscription.create()
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.singleFileSubscription.create()
        self.multipleSiteSubscription = Subscription(
            fileset=self.multipleSiteFileset,
            workflow=testWorkflow,
            split_algo="SizeBased",
            type="Processing")
        self.multipleSiteSubscription.create()
        return
Example #32
0
    def test10(self):
        """
        _test10_

        Test merging of multiple lumis with holes in the lumi sequence

        Hole is due to no streamer files for the lumi

        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 5]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 4,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 1,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        self.feedStreamersDAO.execute(transaction = False)
        self.fileset1.loadData()

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.acquireFilesDAO.execute(self.subscription1['id'], fileid,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.completeFilesDAO.execute(self.subscription1['id'], fileid,
                                          transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Example #33
0
    def testChunking(self):
        """
        _testChunking_

        Insert a workload and files that have several distinct sets of
        locations.  Verify that the chunks are created correctly and that they
        only groups files that have the same set of locations.  Also verify that
        the chunks are pulled out of ACDC correctly.
        """
        dcs = DataCollectionService(url=self.testInit.couchUrl, database="wmcore-acdc-datacollectionsvc")

        testFileA = File(lfn=makeUUID(), size=1024, events=1024)
        testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn=makeUUID(), size=1024, events=1024)
        testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileB.addRun(Run(1, 3, 4))
        testFileC = File(lfn=makeUUID(), size=1024, events=1024)
        testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileC.addRun(Run(1, 5, 6))
        testJobA = self.getMinimalJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        testJobA.addFile(testFileC)

        testFileD = File(lfn=makeUUID(), size=1024, events=1024)
        testFileD.setLocation(["cmssrm.fnal.gov"])
        testFileD.addRun(Run(2, 1, 2))
        testFileE = File(lfn=makeUUID(), size=1024, events=1024)
        testFileE.setLocation(["cmssrm.fnal.gov"])
        testFileE.addRun(Run(2, 3, 4))
        testJobB = self.getMinimalJob()
        testJobB.addFile(testFileD)
        testJobB.addFile(testFileE)

        testFileF = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/F"})
        testFileF.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileF.addRun(Run(3, 1, 2))
        testFileG = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/G"})
        testFileG.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileG.addRun(Run(3, 3, 4))
        testFileH = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/H"})
        testFileH.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileH.addRun(Run(3, 5, 6))
        testJobC = self.getMinimalJob()
        testJobC.addFile(testFileF)
        testJobC.addFile(testFileG)
        testJobC.addFile(testFileH)

        testFileI = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileI.addRun(Run(4, 1, 2))
        testFileJ = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileJ.addRun(Run(4, 3, 4))
        testFileK = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileK.addRun(Run(4, 5, 6))
        testJobD = self.getMinimalJob()
        testJobD.addFile(testFileI)
        testJobD.addFile(testFileJ)
        testJobD.addFile(testFileK)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD])
        chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco", chunkSize=5)

        self.assertEqual(len(chunks), 4, "Error: There should be four chunks: %s" % len(chunks))

        goldenMetaData = {1: {"lumis": 2, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 1024},
                          2: {"lumis": 4, "locations": ["cmssrm.fnal.gov"], "events": 2048},
                          3: {"lumis": 6, "locations": ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"],
                              "events": 3072},
                          5: {"lumis": 10, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 5120}}

        testFiles = [testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK]
        lastFile = testFileA
        for testFile in testFiles:
            if lastFile["lfn"] < testFile["lfn"]:
                lastFile = testFile

        testFiles.remove(lastFile)

        goldenFiles = {1: [lastFile],
                       2: [testFileD, testFileE],
                       3: [testFileF, testFileG, testFileH],
                       5: testFiles}

        for chunk in chunks:
            chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco",
                                             chunk["offset"], chunk["files"])

            self.assertEqual(chunkMetaData["files"], chunk["files"])
            self.assertEqual(chunkMetaData["lumis"], chunk["lumis"])
            self.assertEqual(chunkMetaData["events"], chunk["events"])
            self.assertEqual(chunkMetaData["locations"], chunk["locations"])

            self.assertTrue(chunk["files"] in goldenMetaData.keys(), "Error: Extra chunk found.")
            self.assertEqual(chunk["lumis"], goldenMetaData[chunk["files"]]["lumis"],
                             "Error: Lumis in chunk is wrong.")
            self.assertEqual(chunk["locations"], goldenMetaData[chunk["files"]]["locations"],
                             "Error: Locations in chunk is wrong.")
            self.assertEqual(chunk["events"], goldenMetaData[chunk["files"]]["events"],
                             "Error: Events in chunk is wrong.")
            del goldenMetaData[chunk["files"]]

            chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco", chunk["offset"], chunk["files"])

            self.assertTrue(chunk["files"] in goldenFiles.keys(), "Error: Extra chunk found.")
            goldenChunkFiles = goldenFiles[chunk["files"]]
            self.assertEqual(len(chunkFiles), len(goldenChunkFiles))

            for chunkFile in chunkFiles:
                foundFile = None
                for goldenChunkFile in goldenChunkFiles:
                    if chunkFile["lfn"] == goldenChunkFile["lfn"]:
                        foundFile = goldenChunkFile
                        break

                self.assertIsNotNone(foundFile, "Error: Missing chunk file: %s, %s" % (chunkFiles, goldenChunkFiles))
                self.assertEqual(foundFile["parents"], chunkFile["parents"], "Error: File parents should match.")
                self.assertEqual(foundFile["merged"], chunkFile["merged"], "Error: File merged status should match.")
                self.assertEqual(foundFile["locations"], chunkFile["locations"], "Error: File locations should match.")
                self.assertEqual(foundFile["events"], chunkFile["events"])
                self.assertEqual(foundFile["size"], chunkFile["size"])
                self.assertEqual(len(foundFile["runs"]), len(chunkFile["runs"]), "Error: Wrong number of runs.")
                for run in foundFile["runs"]:
                    runMatch = False
                    for chunkRun in chunkFile["runs"]:
                        if chunkRun.run == run.run and chunkRun.lumis == run.lumis:
                            runMatch = True
                            break

                    self.assertTrue(runMatch, "Error: Run information is wrong.")

            del goldenFiles[chunk["files"]]

        singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(singleChunk, {"offset": 0,
                                       "files": 11,
                                       "events": 11264,
                                       "lumis": 22,
                                       "locations": {"castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"}},
                         "Error: Single chunk metadata is wrong")

        return
Example #34
0
    def createFilesWithChildren(self, moreParentFiles, acqEra):
        """
        _createFilesWithChildren_

        Create several parentless files and then create child files.
        """
        parentFiles = []
        childFiles = []

        baseLFN = "/store/data/%s/Cosmics/RAW/v1/000/143/316/" % acqEra
        for i in range(10):
            testFile = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root", size=1024,
                                     events=20, checksums={"cksum": 1})
            testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1",
                                  appFam="RAW", psetHash="GIBBERISH",
                                  configContent="MOREGIBBERISH")
            testFile.setDatasetPath("/Cosmics/%s-v1/RAW" % acqEra)

            testFile['block_close_max_wait_time'] = 1000000
            testFile['block_close_max_events'] = 1000000
            testFile['block_close_max_size'] = 1000000
            testFile['block_close_max_files'] = 1000000

            lumis = []
            for j in range(10):
                lumis.append((i * 10) + j)
            testFile.addRun(Run(143316, *lumis))

            testFile.setAcquisitionEra(acqEra)
            testFile.setProcessingVer("1")
            testFile.setGlobalTag("START54::All")
            testFile.create()
            testFile.setLocation("malpaquet")
            parentFiles.append(testFile)

        baseLFN = "/store/data/%s/Cosmics/RECO/v1/000/143/316/" % acqEra
        for i in range(5):
            testFile = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root", size=1024,
                                     events=20, checksums={"cksum": 1})
            testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1",
                                  appFam="RECO", psetHash="GIBBERISH",
                                  configContent="MOREGIBBERISH")
            testFile.setDatasetPath("/Cosmics/%s-v1/RECO" % acqEra)

            testFile['block_close_max_wait_time'] = 1000000
            testFile['block_close_max_events'] = 1000000
            testFile['block_close_max_size'] = 1000000
            testFile['block_close_max_files'] = 1000000

            lumis = []
            for j in range(20):
                lumis.append((i * 20) + j)
            testFile.addRun(Run(143316, *lumis))

            testFile.setAcquisitionEra(acqEra)
            testFile.setProcessingVer("1")
            testFile.setGlobalTag("START54::All")
            testFile.create()
            testFile.setLocation("malpaquet")
            testFile.addParents([parentFiles[i * 2]["lfn"],
                                 parentFiles[i * 2 + 1]["lfn"]])
            testFile.addParents([moreParentFiles[i * 2]["lfn"],
                                 moreParentFiles[i * 2 + 1]["lfn"]])
            childFiles.append(testFile)

        return parentFiles, childFiles
Example #35
0
    def testListRunningJobs(self):
        """
        _testListRunningJobs_

        Test the ListRunningJobs DAO.
        """
        testWorkflow = Workflow(spec=makeUUID(),
                                owner="Steve",
                                name=makeUUID(),
                                task="Test")
        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow,
                                        type="Processing")
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJobA = Job(name=makeUUID(), files=[])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group=testJobGroup)
        testJobA["state"] = "executing"

        testJobB = Job(name=makeUUID(), files=[])
        testJobB["couch_record"] = makeUUID()
        testJobB.create(group=testJobGroup)
        testJobB["state"] = "complete"

        testJobC = Job(name=makeUUID(), files=[])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group=testJobGroup)
        testJobC["state"] = "new"

        changeStateAction = self.daoFactory(classname="Jobs.ChangeState")
        changeStateAction.execute(jobs=[testJobA, testJobB, testJobC])

        runningJobsAction = self.daoFactory(
            classname="Monitoring.ListRunningJobs")
        runningJobs = runningJobsAction.execute()

        assert len(runningJobs) == 2, \
               "Error: Wrong number of running jobs returned."

        for runningJob in runningJobs:
            if runningJob["job_name"] == testJobA["name"]:
                assert runningJob["state"] == testJobA["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobA["couch_record"], \
                       "Error: Running job has wrong couch record."
            else:
                assert runningJob["job_name"] == testJobC["name"], \
                       "Error: Running job has wrong name."
                assert runningJob["state"] == testJobC["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobC["couch_record"], \
                       "Error: Running job has wrong couch record."

        return
Example #36
0
    def test03(self):
        """
        _test03_

        Test single lumi event threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 650
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return
Example #37
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Run the discovery query and generate jobs if we find enough files.
        """
        # This doesn't use a proxy
        self.grabByProxy = False

        filesPerJob = int(kwargs.get("files_per_job", 10))

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "WMCore.WMBS",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        fileAvail = daoFactory(classname = "Subscriptions.SiblingSubscriptionsComplete")
        completeFiles = fileAvail.execute(self.subscription["id"],
                                          conn = myThread.transaction.conn,
                                          transaction = True)

        self.subscription["fileset"].load()
        if self.subscription["fileset"].open == True:
            filesetClosed = False
        else:
            fileFailed = daoFactory(classname = "Subscriptions.SiblingSubscriptionsFailed")
            fileFailed.execute(self.subscription["id"],
                               self.subscription["fileset"].id,
                               conn = myThread.transaction.conn,
                               transaction = True)
            filesetClosed = True

        fileSites = {}
        foundFiles = []
        for completeFile in completeFiles:
            if completeFile["lfn"] not in foundFiles:
                foundFiles.append(completeFile["lfn"])
            else:
                continue

            if completeFile["pnn"] not in fileSites:
                fileSites[completeFile["pnn"]] = []

            fileSites[completeFile["pnn"]].append(completeFile)

        for siteName in fileSites:
            if len(fileSites[siteName]) < filesPerJob and not filesetClosed:
                continue

            self.newGroup()
            while len(fileSites[siteName]) >= filesPerJob:
                self.newJob(name = makeUUID())
                for jobFile in fileSites[siteName][0:filesPerJob]:
                    newFile = File(id = jobFile["id"], lfn = jobFile["lfn"],
                                   events = jobFile["events"])
                    newFile["locations"] = set([jobFile["pnn"]])
                    self.currentJob.addFile(newFile)

                fileSites[siteName] = fileSites[siteName][filesPerJob:]

            if filesetClosed and len(fileSites[siteName]) > 0:
                self.newJob(name = makeUUID())
                for jobFile in fileSites[siteName]:
                    newFile = File(id = jobFile["id"], lfn = jobFile["lfn"],
                                   events = jobFile["events"])
                    newFile["locations"] = set([jobFile["pnn"]])
                    self.currentJob.addFile(newFile)

        return
Example #38
0
    def populateWMBS(self):
        """
        _populateWMBS_

        Create files and subscriptions in WMBS
        """
        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName='s1', pnn="T1_US_FNAL_Disk")
        locationAction.execute(siteName='s2', pnn="T2_CH_CERN")
        self.validLocations = ["T1_US_FNAL_Disk", "T2_CH_CERN"]

        testWorkflow = Workflow(spec="spec.xml", owner="Steve",
                                name="wf001", task="Test")
        testWorkflow.create()

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        parentFile = File('/parent/lfn/', size=1000, events=100,
                          locations=set(["T1_US_FNAL_Disk"]))
        parentFile.create()
        for _ in range(10):
            newFile = File(makeUUID(), size=1000, events=100,
                           locations=set(["T1_US_FNAL_Disk"]))
            newFile.create()
            newFile.addParent(lfn=parentFile['lfn'])
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()
        self.multipleFileSubscription = Subscription(fileset=self.multipleFileFileset,
                                                     workflow=testWorkflow,
                                                     split_algo="EventBased",
                                                     type="Processing")
        self.multipleFileSubscription.create()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name", size=1000, events=100,
                       locations=set(["T1_US_FNAL_Disk"]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()
        self.singleFileSubscription = Subscription(fileset=self.singleFileFileset,
                                                   workflow=testWorkflow,
                                                   split_algo="EventBased",
                                                   type="Processing")
        self.singleFileSubscription.create()

        self.multipleSiteFileset = Fileset(name="TestFileset3")
        self.multipleSiteFileset.create()
        for _ in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation("T1_US_FNAL_Disk")
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        for _ in range(5):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation(["T1_US_FNAL_Disk", "T2_CH_CERN"])
            newFile.create()
            self.multipleSiteFileset.addFile(newFile)
        self.multipleSiteFileset.commit()
        self.multipleSiteSubscription = Subscription(fileset=self.multipleSiteFileset,
                                                     workflow=testWorkflow,
                                                     split_algo="EventBased",
                                                     type="Processing")
        self.multipleSiteSubscription.create()

        return
Example #39
0
    def testAutoIncrementCheck(self):
        """
        _AutoIncrementCheck_

        Test and see whether we can find and set the auto_increment values
        """
        myThread = threading.currentThread()
        if not myThread.dialect.lower() == 'mysql':
            return

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck")
        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 1)

        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 2)

        incrementDAO.execute(input=10)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 11)

        incrementDAO.execute(input=5)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 12)

        return
Example #40
0
    def createTestJobGroup(self,
                           name="TestWorkthrough",
                           specLocation="spec.xml",
                           error=False,
                           task="/TestWorkload/ReReco",
                           nJobs=10):
        """
        _createTestJobGroup_

        Generate a test WMBS JobGroup with real FWJRs
        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=specLocation,
                                owner="Simon",
                                name=name,
                                task=task)
        testWorkflow.create()

        testWMBSFileset = Fileset(name=name)
        testWMBSFileset.create()

        testFileA = File(lfn=makeUUID(), size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn=makeUUID(), size=1024, events=10)
        testFileB.addRun(Run(10, *[12312]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        report = Report()
        if error:
            path = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t/fwjrs",
                                "badBackfillJobReport.pkl")
        else:
            path = os.path.join(WMCore.WMBase.getTestBase(),
                                "WMComponent_t/JobAccountant_t/fwjrs",
                                "PerformanceReport2.pkl")
        report.load(filename=path)

        self.changeState.propagate(testJobGroup.jobs, 'created', 'new')
        self.changeState.propagate(testJobGroup.jobs, 'executing', 'created')
        self.changeState.propagate(testJobGroup.jobs, 'complete', 'executing')
        for job in testJobGroup.jobs:
            job['fwjr'] = report
        self.changeState.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        self.changeState.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed')
        self.changeState.propagate(testJobGroup.jobs, 'exhausted', 'retrydone')
        self.changeState.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup
Example #41
0
    def createJobs(self):
        """
        _createJobs_

        Create test jobs in WMBS and BossAir
        """
        testWorkflow = Workflow(spec=makeUUID(),
                                owner="tapas",
                                name=makeUUID(),
                                task="Test")
        testWorkflow.create()

        testFilesetA = Fileset(name="TestFilesetA")
        testFilesetA.create()
        testFilesetB = Fileset(name="TestFilesetB")
        testFilesetB.create()
        testFilesetC = Fileset(name="TestFilesetC")
        testFilesetC.create()

        testFileA = File(lfn="testFileA",
                         locations=set(["testSE1", "testSE2"]))
        testFileA.create()
        testFilesetA.addFile(testFileA)
        testFilesetA.commit()
        testFilesetB.addFile(testFileA)
        testFilesetB.commit()
        testFilesetC.addFile(testFileA)
        testFilesetC.commit()

        testSubscriptionA = Subscription(fileset=testFilesetA,
                                         workflow=testWorkflow,
                                         type="Processing")
        testSubscriptionA.create()
        testSubscriptionA.addWhiteBlackList([{
            "site_name": "testSite1",
            "valid": True
        }])
        testSubscriptionB = Subscription(fileset=testFilesetB,
                                         workflow=testWorkflow,
                                         type="Processing")
        testSubscriptionB.create()
        testSubscriptionB.addWhiteBlackList([{
            "site_name": "testSite1",
            "valid": False
        }])
        testSubscriptionC = Subscription(fileset=testFilesetC,
                                         workflow=testWorkflow,
                                         type="Merge")
        testSubscriptionC.create()

        testJobGroupA = JobGroup(subscription=testSubscriptionA)
        testJobGroupA.create()
        testJobGroupB = JobGroup(subscription=testSubscriptionB)
        testJobGroupB.create()
        testJobGroupC = JobGroup(subscription=testSubscriptionC)
        testJobGroupC.create()

        # Site1, Has been assigned a location and is complete.
        testJobA = Job(name="testJobA", files=[testFileA])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group=testJobGroupA)
        testJobA["state"] = "success"

        # Site 1, Has been assigned a location and is incomplete.
        testJobB = Job(name="testJobB", files=[testFileA])
        testJobB["couch_record"] = makeUUID()
        testJobB["cache_dir"] = self.tempDir
        testJobB.create(group=testJobGroupA)
        testJobB["state"] = "executing"
        runJobB = RunJob()
        runJobB.buildFromJob(testJobB)
        runJobB["status"] = "PEND"

        # Does not have a location, white listed to site 1
        testJobC = Job(name="testJobC", files=[testFileA])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group=testJobGroupA)
        testJobC["state"] = "new"

        # Site 2, Has been assigned a location and is complete.
        testJobD = Job(name="testJobD", files=[testFileA])
        testJobD["couch_record"] = makeUUID()
        testJobD.create(group=testJobGroupB)
        testJobD["state"] = "success"

        # Site 2, Has been assigned a location and is incomplete.
        testJobE = Job(name="testJobE", files=[testFileA])
        testJobE["couch_record"] = makeUUID()
        testJobE.create(group=testJobGroupB)
        testJobE["state"] = "executing"
        runJobE = RunJob()
        runJobE.buildFromJob(testJobE)
        runJobE["status"] = "RUN"

        # Does not have a location, site 1 is blacklisted.
        testJobF = Job(name="testJobF", files=[testFileA])
        testJobF["couch_record"] = makeUUID()
        testJobF.create(group=testJobGroupB)
        testJobF["state"] = "new"

        # Site 3, Has been assigned a location and is complete.
        testJobG = Job(name="testJobG", files=[testFileA])
        testJobG["couch_record"] = makeUUID()
        testJobG.create(group=testJobGroupC)
        testJobG["state"] = "cleanout"

        # Site 3, Has been assigned a location and is incomplete.
        testJobH = Job(name="testJobH", files=[testFileA])
        testJobH["couch_record"] = makeUUID()
        testJobH.create(group=testJobGroupC)
        testJobH["state"] = "new"

        # Site 3, Does not have a location.
        testJobI = Job(name="testJobI", files=[testFileA])
        testJobI["couch_record"] = makeUUID()
        testJobI.create(group=testJobGroupC)
        testJobI["state"] = "new"

        # Site 3, Does not have a location and is in cleanout.
        testJobJ = Job(name="testJobJ", files=[testFileA])
        testJobJ["couch_record"] = makeUUID()
        testJobJ.create(group=testJobGroupC)
        testJobJ["state"] = "cleanout"

        changeStateAction = self.daoFactory(classname="Jobs.ChangeState")
        changeStateAction.execute(jobs=[
            testJobA, testJobB, testJobC, testJobD, testJobE, testJobF,
            testJobG, testJobH, testJobI, testJobJ
        ])

        self.insertRunJob.execute([runJobB, runJobE])

        setLocationAction = self.daoFactory(classname="Jobs.SetLocation")
        setLocationAction.execute(testJobA["id"], "testSite1")
        setLocationAction.execute(testJobB["id"], "testSite1")
        setLocationAction.execute(testJobD["id"], "testSite1")
        setLocationAction.execute(testJobE["id"], "testSite2")
        setLocationAction.execute(testJobG["id"], "testSite1")
        setLocationAction.execute(testJobH["id"], "testSite1")

        return
Example #42
0
    def test06(self):
        """
        _test06_

        Test repacking of 3 lumis
        2 small lumis (single job), followed by a big one (multiple jobs)

        files for lumi 1 and 2 are below multi-lumi thresholds
        files for lumi 3 are above single-lumi threshold

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3]:
            filecount = 2
            for i in range(filecount):
                if lumi == 3:
                    nevents = 500
                else:
                    nevents = 100
                newFile = File(makeUUID(), size=1000, events=nevents)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        mySplitArgs['maxInputEvents'] = 900
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

        self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1,
                         "ERROR: second job does not process 1 file")

        self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1,
                         "ERROR: third job does not process 1 file")

        return
Example #43
0
    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test multi lumi size threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)

        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi']
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeMultiLumi'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return
Example #44
0
    def testMask(self):
        """
        _testMask_

        Test the new mask setup
        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102])
        testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202])
        testJob.create(group=testJobGroup)

        loadJob = Job(id=testJob.exists())
        loadJob.loadData()

        runs = loadJob['mask'].getRunAndLumis()
        self.assertEqual(len(runs), 2)
        self.assertEqual(runs[100], [[101, 102]])
        self.assertEqual(runs[200], [[201, 202]])

        bigRun = Run(100, *[101, 102, 103, 104])
        badRun = Run(300, *[1001, 1002])
        result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun])

        self.assertEqual(len(result), 1)
        alteredRun = result.pop()
        self.assertEqual(alteredRun.run, 100)
        self.assertEqual(alteredRun.lumis, [101, 102])

        run0 = Run(300, *[1001, 1002])
        run1 = Run(300, *[1001, 1002])
        loadJob['mask'].filterRunLumisByMask([run0, run1])

        return
Example #45
0
    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds={
            'RUN': 1,
            'LUMI': 3,
            'STREAM': "A",
            'FILECOUNT': 0,
            'INSERT_TIME': self.currentTime,
            'CLOSE_TIME': self.currentTime
        },
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

        return
Example #46
0
    def testDualUpload(self):
        """
        _testDualUpload_

        Verify that the dual upload mode works correctly.
        """
        self.dbsApi = DbsApi(url=self.dbsUrl)
        config = self.getConfig()
        dbsUploader = DBSUploadPoller(config=config)
        dbsUtil = DBSBufferUtil()

        # First test verifies that uploader will poll and then not do anything
        # as the database is empty.
        dbsUploader.algorithm()

        acqEra = "Summer%s" % (int(time.time()))
        parentFiles = self.createParentFiles(acqEra)
        (moreParentFiles, childFiles) = \
            self.createFilesWithChildren(parentFiles, acqEra)

        allFiles = parentFiles + moreParentFiles
        allBlocks = []
        for i in range(4):
            DBSBufferDataset(parentFiles[0]["datasetPath"]).create()
            blockName = parentFiles[0]["datasetPath"] + "#" + makeUUID()
            dbsBlock = DBSBufferBlock(blockName,
                                      location="malpaquet",
                                      datasetpath=None)
            dbsBlock.status = "Open"
            dbsBlock.setDataset(parentFiles[0]["datasetPath"], 'data', 'VALID')
            dbsUtil.createBlocks([dbsBlock])
            for fileObj in allFiles[i * 5: (i * 5) + 5]:
                dbsBlock.addFile(fileObj, 'data', 'VALID')
                dbsUtil.setBlockFiles({"block": blockName, "filelfn": fileObj["lfn"]})
                if i < 2:
                    dbsBlock.status = "InDBS"
                dbsUtil.updateBlocks([dbsBlock])
            dbsUtil.updateFileStatus([dbsBlock], "InDBS")
            allBlocks.append(dbsBlock)

        DBSBufferDataset(childFiles[0]["datasetPath"]).create()
        blockName = childFiles[0]["datasetPath"] + "#" + makeUUID()
        dbsBlock = DBSBufferBlock(blockName,
                                  location="malpaquet",
                                  datasetpath=None)
        dbsBlock.status = "InDBS"
        dbsBlock.setDataset(childFiles[0]["datasetPath"], 'data', 'VALID')
        dbsUtil.createBlocks([dbsBlock])
        for fileObj in childFiles:
            dbsBlock.addFile(fileObj, 'data', 'VALID')
            dbsUtil.setBlockFiles({"block": blockName, "filelfn": fileObj["lfn"]})

        dbsUtil.updateFileStatus([dbsBlock], "InDBS")

        dbsUploader.algorithm()
        time.sleep(5)
        dbsUploader.algorithm()
        time.sleep(5)

        self.verifyData(parentFiles[0]["datasetPath"], parentFiles)

        # Change the status of the rest of the parent blocks so we can upload
        # them and the children.
        for dbsBlock in allBlocks:
            dbsBlock.status = "InDBS"
            dbsUtil.updateBlocks([dbsBlock])

        dbsUploader.algorithm()
        time.sleep(5)

        self.verifyData(parentFiles[0]["datasetPath"], parentFiles + moreParentFiles)

        # Run the uploader one more time to upload the children.
        dbsUploader.algorithm()
        time.sleep(5)

        self.verifyData(childFiles[0]["datasetPath"], childFiles)
        return
Example #47
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45 + i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileRunset = Fileset(name="TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.addRun(Run(i // 3, *[45]))
            self.multipleFileRunset.addFile(newFile)

        self.singleRunFileset = Fileset(name="TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45]))
            self.singleRunFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.multipleRunSubscription = Subscription(
            fileset=self.multipleFileRunset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")
        self.singleRunSubscription = Subscription(
            fileset=self.singleRunFileset,
            workflow=testWorkflow,
            split_algo="RunBased",
            type="Processing")

        return
Example #48
0
 def __call__(self, wmbsJob):
     wmbsJob['id'] = "%s/%s" % (self.task.getPathName(), makeUUID())
     wmbsJob['name'] = "%s/%s" % (self.task.getPathName(), makeUUID())
Example #49
0
    def testC_ACDCTest(self):
        """
        _ACDCTest_

        Test whether we can get a goodRunList out of ACDC
        and process it correctly.
        """
        workload = self.createTestWorkload()
        dcs = DataCollectionService(url=self.testInit.couchUrl,
                                    database=self.testInit.couchDbName)

        testFileA = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileA.addRun(Run(1, 1, 2))
        testFileA.create()
        testFileB = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileB.addRun(Run(1, 3))
        testFileB.create()
        testJobA = getJob(workload)
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)

        testFileC = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileC.addRun(Run(1, 4, 6))
        testFileC.create()
        testJobB = getJob(workload)
        testJobB.addFile(testFileC)

        testFileD = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileD.addRun(Run(1, 7))
        testFileD.create()
        testJobC = getJob(workload)
        testJobC.addFile(testFileD)

        testFileE = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileE.addRun(Run(1, 11, 12))
        testFileE.create()
        testJobD = getJob(workload)
        testJobD.addFile(testFileE)

        testFileF = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileF.addRun(Run(2, 5, 6, 7))
        testFileF.create()
        testJobE = getJob(workload)
        testJobE.addFile(testFileF)

        testFileG = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileG.addRun(Run(2, 10, 11, 12))
        testFileG.create()
        testJobF = getJob(workload)
        testJobF.addFile(testFileG)

        testFileH = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileH.addRun(Run(2, 15))
        testFileH.create()
        testJobG = getJob(workload)
        testJobG.addFile(testFileH)

        testFileI = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileI.addRun(Run(3, 20))
        testFileI.create()
        testJobH = getJob(workload)
        testJobH.addFile(testFileI)

        testFileJ = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileJ.addRun(Run(1, 9))
        testFileJ.create()
        testJobI = getJob(workload)
        testJobI.addFile(testFileJ)

        # dcs.failedJobs([testJobA, testJobB, testJobC, testJobD, testJobE,
        #                testJobF, testJobG, testJobH, testJobI])

        dcs.failedJobs([testJobA, testJobD, testJobH])

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)
        testFileset.addFile(testFileH)
        testFileset.addFile(testFileI)
        testFileset.addFile(testFileJ)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        jobGroups = jobFactory(
            lumis_per_job=100,
            halt_job_on_file_boundaries=False,
            splitOnRun=True,
            collectionName=workload.name(),
            filesetName=workload.getTask("reco").getPathName(),
            owner="evansde77",
            group="DMWM",
            couchURL=self.testInit.couchUrl,
            couchDB=self.testInit.couchDbName,
            performance=self.performanceParams)

        self.assertEqual(jobGroups[0].jobs[0]['mask'].getRunAndLumis(),
                         {1: [[1, 2], [3, 3], [11, 12]]})
        self.assertEqual(jobGroups[0].jobs[1]['mask'].getRunAndLumis(),
                         {3: [[20, 20]]})

        return
Example #50
0
    def createTestJobGroup(self, nJobs=10, retry_count=1,
                           workloadPath='test', fwjrPath=None,
                           workloadName=makeUUID(),
                           fileModifier=''):
        """
        Creates a group of several jobs
        """


        myThread = threading.currentThread()
        myThread.transaction.begin()
        testWorkflow = Workflow(spec=workloadPath, owner="cmsdataops", group="cmsdataops",
                                name=workloadName, task="/TestWorkload/ReReco")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier, size=1024, events=10)
        testFile0.addRun(Run(10, *[12312]))
        testFile0.setLocation('T2_CH_CERN')

        testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier, size=1024, events=10,
                         first_event=88)
        testFileA.addRun(Run(10, *[12312, 12313]))
        testFileA.setLocation('T2_CH_CERN')

        testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier, size=1024, events=10,
                         first_event=88)
        testFileB.addRun(Run(10, *[12314, 12315, 12316]))
        testFileB.setLocation('T2_CH_CERN')

        testFile0.create()
        testFileA.create()
        testFileB.create()

        testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier)
        testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier)

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob['retry_count'] = retry_count
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312])
            testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316])
            testJob['cache_dir'] = os.path.join(self.testDir, testJob['name'])
            testJob['fwjr_path'] = fwjrPath
            os.mkdir(testJob['cache_dir'])
            testJobGroup.add(testJob)
            testJob.create(group=testJobGroup)
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob.save()


        testJobGroup.commit()


        testSubscription.acquireFiles(files=[testFileA, testFileB])
        testSubscription.save()
        myThread.transaction.commit()

        return testJobGroup
Example #51
0
    def testCompleteJobInput(self):
        """
        _testCompleteJobInput_

        Verify the correct output of the CompleteInput DAO.  This should mark
        the input for a job as complete once all the jobs that run over a
        particular file have complete successfully.
        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        bogusWorkflow = Workflow(spec="spec1.xml",
                                 owner="Steve",
                                 name="wf002",
                                 task="Test")
        testWorkflow.create()
        bogusWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        bogusFileset = Fileset(name="BogusFileset")
        testFileset.create()
        bogusFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        bogusSubscription = Subscription(fileset=bogusFileset,
                                         workflow=bogusWorkflow)
        testSubscription.create()
        bogusSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileA.create()
        testFileB.create()
        testFileC.create()

        testFileset.addFile([testFileA, testFileB, testFileC])
        bogusFileset.addFile([testFileA, testFileB, testFileC])
        testFileset.commit()
        bogusFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB, testFileC])
        bogusSubscription.acquireFiles([testFileA, testFileB, testFileC])

        testJobGroup = JobGroup(subscription=testSubscription)
        bogusJobGroup = JobGroup(subscription=bogusSubscription)
        testJobGroup.create()
        bogusJobGroup.create()

        testJobA = Job(name="TestJobA", files=[testFileA])
        testJobB = Job(name="TestJobB", files=[testFileA, testFileB])
        testJobC = Job(name="TestJobC", files=[testFileC])
        bogusJob = Job(name="BogusJob",
                       files=[testFileA, testFileB, testFileC])
        testJobA.create(group=testJobGroup)
        testJobB.create(group=testJobGroup)
        testJobC.create(group=testJobGroup)
        bogusJob.create(group=bogusJobGroup)

        testJobA["outcome"] = "success"
        testJobB["outcome"] = "failure"
        testJobC["outcome"] = "success"
        testJobA.save()
        testJobB.save()
        testJobC.save()

        testJobA.completeInputFiles()

        compFiles = len(testSubscription.filesOfStatus("Completed"))
        assert compFiles == 0, \
            "Error: test sub has wrong number of complete files: %s" % compFiles

        testJobB["outcome"] = "success"
        testJobB.save()

        testJobB.completeInputFiles(skipFiles=[testFileB["lfn"]])

        availFiles = len(testSubscription.filesOfStatus("Available"))
        assert availFiles == 0, \
            "Error: test sub has wrong number of available files: %s" % availFiles

        acqFiles = len(testSubscription.filesOfStatus("Acquired"))
        assert acqFiles == 1, \
            "Error: test sub has wrong number of acquired files: %s" % acqFiles

        compFiles = len(testSubscription.filesOfStatus("Completed"))
        assert compFiles == 1, \
            "Error: test sub has wrong number of complete files: %s" % compFiles

        failFiles = len(testSubscription.filesOfStatus("Failed"))
        assert failFiles == 1, \
            "Error: test sub has wrong number of failed files: %s" % failFiles

        availFiles = len(bogusSubscription.filesOfStatus("Available"))
        assert availFiles == 0, \
            "Error: test sub has wrong number of available files: %s" % availFiles

        acqFiles = len(bogusSubscription.filesOfStatus("Acquired"))
        assert acqFiles == 3, \
            "Error: test sub has wrong number of acquired files: %s" % acqFiles

        compFiles = len(bogusSubscription.filesOfStatus("Completed"))
        assert compFiles == 0, \
            "Error: test sub has wrong number of complete files: %s" % compFiles

        failFiles = len(bogusSubscription.filesOfStatus("Failed"))
        assert failFiles == 0, \
            "Error: test sub has wrong number of failed files: %s" % failFiles

        return
Example #52
0
    def testGetLumiWhitelist(self):
        """
        _testGetLumiWhitelist_

        Verify that the ACDC whitelist generation code works correctly.  We'll
        add jobs with the following lumi info:
          # Run 1, lumis [1, 2, 3], [4, 6], [7], [9], [11, 12]
          # Run 2, lumis [5, 6, 7], [10, 11, 12], [15]
          # Run 3, lumis [20]

        And should get out a whitelist that looks like this:
          {"1": [[1, 4], [6, 7], [9, 9], [11, 12]],
           "2": [[5, 7], [10, 12], [15, 15]],
           "3": [[20, 20]]}
        """
        dcs = DataCollectionService(url=self.testInit.couchUrl, database="wmcore-acdc-datacollectionsvc")

        testFileA = File(lfn=makeUUID(), size=1024, events=1024)
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn=makeUUID(), size=1024, events=1024)
        testFileB.addRun(Run(1, 3))
        testJobA = self.getMinimalJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)

        testFileC = File(lfn=makeUUID(), size=1024, events=1024)
        testFileC.addRun(Run(1, 4, 6))
        testJobB = self.getMinimalJob()
        testJobB.addFile(testFileC)

        testFileD = File(lfn=makeUUID(), size=1024, events=1024)
        testFileD.addRun(Run(1, 7))
        testJobC = self.getMinimalJob()
        testJobC.addFile(testFileD)

        testFileE = File(lfn=makeUUID(), size=1024, events=1024)
        testFileE.addRun(Run(1, 11, 12))
        testJobD = self.getMinimalJob()
        testJobD.addFile(testFileE)

        testFileF = File(lfn=makeUUID(), size=1024, events=1024)
        testFileF.addRun(Run(2, 5, 6, 7))
        testJobE = self.getMinimalJob()
        testJobE.addFile(testFileF)

        testFileG = File(lfn=makeUUID(), size=1024, events=1024)
        testFileG.addRun(Run(2, 10, 11, 12))
        testJobF = self.getMinimalJob()
        testJobF.addFile(testFileG)

        testFileH = File(lfn=makeUUID(), size=1024, events=1024)
        testFileH.addRun(Run(2, 15))
        testJobG = self.getMinimalJob()
        testJobG.addFile(testFileH)

        testFileI = File(lfn=makeUUID(), size=1024, events=1024)
        testFileI.addRun(Run(3, 20))
        testJobH = self.getMinimalJob()
        testJobH.addFile(testFileI)

        testFileJ = File(lfn=makeUUID(), size=1024, events=1024)
        testFileJ.addRun(Run(1, 9))
        testJobI = self.getMinimalJob()
        testJobI.addFile(testFileJ)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD, testJobE,
                        testJobF, testJobG, testJobH, testJobI])
        whiteList = dcs.getLumiWhitelist("ACDCTest", "/ACDCTest/reco")

        self.assertEqual(len(whiteList.keys()), 3,
                         "Error: There should be 3 runs.")
        self.assertEqual(whiteList["1"], [[1, 4], [6, 7], [9, 9], [11, 12]],
                         "Error: Whitelist for run 1 is wrong.")
        self.assertEqual(whiteList["2"], [[5, 7], [10, 12], [15, 15]],
                         "Error: Whitelist for run 2 is wrong.")
        self.assertEqual(whiteList["3"], [[20, 20]],
                         "Error: Whitelist for run 3 is wrong.")

        correctLumiList = LumiList(compactList={"1": [[1, 4], [6, 7], [9, 9], [11, 12]],
                                                "2": [[5, 7], [10, 12], [15, 15]],
                                                "3": [[20, 20]]})
        testLumiList = dcs.getLumilistWhitelist("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(correctLumiList.getCMSSWString(), testLumiList.getCMSSWString())

        return
Example #53
0
    def bulkCommit(self, jobGroups):
        """
        _bulkCommit_

        Commits all objects created during job splitting.  This is dangerous because it assumes
        that you can pass in all jobGroups.
        """

        jobList = []
        jobGroupList = []
        nameList = []

        # You have to do things in this order:
        # 1) First create Filesets, then jobGroups
        # 2) Second, create jobs pointing to jobGroups
        # 3) Deal with masks, etc.

        # First, do we exist?  We better
        # This happens in its own transaction
        if self['id'] == -1:
            self.create()

        existingTransaction = self.beginTransaction()

        # You need to create a number of Filesets equal to the
        # number of jobGroups.

        for _ in jobGroups:
            # Make a random name for each fileset
            nameList.append(makeUUID())

        # Create filesets
        action = self.daofactory(classname="Fileset.BulkNewReturn")
        fsIDs = action.execute(nameList=nameList, open=True,
                               conn=self.getDBConn(),
                               transaction=self.existingTransaction())

        for jobGroup in jobGroups:
            jobGroup.uid = makeUUID()
            jobGroupList.append({'subscription': self['id'],
                                 'uid': jobGroup.uid,
                                 'output': fsIDs.pop()})

        action = self.daofactory(classname="JobGroup.BulkNewReturn")
        jgIDs = action.execute(bulkInput=jobGroupList,
                               conn=self.getDBConn(),
                               transaction=self.existingTransaction())

        for jobGroup in jobGroups:
            for idUID in jgIDs:
                # This should assign an ID to the right job
                if jobGroup.uid == idUID['guid']:
                    jobGroup.id = idUID['id']
                    break

        for jobGroup in jobGroups:
            for job in jobGroup.newjobs:
                if job["id"] is not None:
                    continue

                job["jobgroup"] = jobGroup.id

                if job["name"] is None:
                    job["name"] = makeUUID()
                jobList.append(job)

        bulkAction = self.daofactory(classname="Jobs.New")
        result = bulkAction.execute(jobList=jobList, conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        # Move jobs to jobs from newjobs
        for jobGroup in jobGroups:
            jobGroup.jobs.extend(jobGroup.newjobs)
            jobGroup.newjobs = []

        # Use the results of the bulk commit to get the jobIDs
        fileDict = {}
        jobFileRunLumis = []
        for job in jobList:
            job['id'] = result[job['name']]
            fileDict[job['id']] = []
            for f in job['input_files']:
                fileDict[job['id']].append(f['id'])
                fileMask = job['mask'].filterRunLumisByMask(runs=f['runs'])
                for runObj in fileMask:
                    run = runObj.run
                    lumis = runObj.lumis
                    for lumi in lumis:
                        jobFileRunLumis.append((job['id'], f['id'], run, lumi))

        # Create a list of mask binds
        maskList = []
        for job in jobList:
            mask = job['mask']
            if len(list(mask['runAndLumis'].keys())) > 0:
                # Then we have multiple binds
                binds = mask.produceCommitBinds(jobID=job['id'])
                maskList.extend(binds)
            else:
                mask['jobID'] = job['id']
                maskList.append(mask)

        maskAction = self.daofactory(classname="Masks.Save")
        maskAction.execute(jobid=None, mask=maskList, conn=self.getDBConn(),
                           transaction=self.existingTransaction())

        fileAction = self.daofactory(classname="Jobs.AddFiles")
        fileAction.execute(jobDict=fileDict, conn=self.getDBConn(),
                           transaction=self.existingTransaction())

        # wfid = self['workflow'].id
        # Add work units and associate them
        # wuAction = self.daofactory(classname='WorkUnit.Add')
        # wufAction = self.daofactory(classname='Jobs.AddWorkUnits')

        # Make a count of how many times each job appears in the list of jobFileRunLumis
        # jobUnitCounts = Counter([jid for jid, _, _, _ in jobFileRunLumis])

        # for jid, fid, run, lumi in jobFileRunLumis:
        #     wuAction.execute(taskid=wfid, fileid=fid, run=run, lumi=lumi, last_unit_count=jobUnitCounts[jid],
        #                      conn=self.getDBConn(), transaction=self.existingTransaction())
        # wufAction.execute(jobFileRunLumis=jobFileRunLumis,
        #                   conn=self.getDBConn(), transaction=self.existingTransaction())

        fileList = []
        for job in jobList:
            fileList.extend(job['input_files'])

        self.acquireFiles(files=fileList)
        self.commitTransaction(existingTransaction)
        return
Example #54
0
    def testCreatePopulateDrop(self):
        """
        _testCreatePopulateDrop_

        Test creating, populating and dropping a collection.
        """
        testCollectionA = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="Thunderstruck")
        testCollectionB = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionA.create()
        testCollectionB.create()

        # There should be nothing in couch.  Documents are only added for
        # filesets and files.

        testFilesA = []
        for i in range(5):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFilesA.append(testFile)
        testFilesB = []
        for i in range(10):
            testFile = File(lfn=makeUUID(),
                            size=random.randint(1024, 4096),
                            events=random.randint(1024, 4096))
            testFilesB.append(testFile)

        testFilesetA = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetA")
        testFilesetB = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetB")
        testFilesetC = CouchFileset(database=self.testInit.couchDbName,
                                    url=self.testInit.couchUrl,
                                    name="TestFilesetC")
        testCollectionA.addFileset(testFilesetA)
        testCollectionB.addFileset(testFilesetB)
        testCollectionB.addFileset(testFilesetC)
        testFilesetA.add(testFilesA)
        testFilesetB.add(testFilesA)
        testFilesetC.add(testFilesA)
        testFilesetC.add(testFilesB)

        # Drop testCollectionA
        testCollectionA.drop()

        # Try to populate testFilesetA
        testCollectionC = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="ThunderStruck")
        testCollectionC.populate()

        self.assertEqual(
            len(testCollectionC["filesets"]), 0,
            "Error: There should be no filesets in this collect.")

        # Try to populate testFilesetB
        testCollectionD = CouchCollection(database=self.testInit.couchDbName,
                                          url=self.testInit.couchUrl,
                                          name="StruckThunder")
        testCollectionD.populate()

        for fileset in testCollectionD["filesets"]:
            testFiles = testFilesA
            if fileset["name"] == "TestFilesetC":
                testFiles.extend(testFilesB)

            self.assertEqual(len(testFiles), len(fileset.files.keys()),
                             "Error: Wrong number of files in fileset.")
            for testFile in testFiles:
                self.assertTrue(testFile["lfn"] in fileset.files.keys(),
                                "Error: File is missing.")
                self.assertEqual(testFile["events"],
                                 fileset.files[testFile["lfn"]]["events"],
                                 "Error: Wrong number of events.")
                self.assertEqual(testFile["size"],
                                 fileset.files[testFile["lfn"]]["size"],
                                 "Error: Wrong file size.")

        return
    def stuffDatabase(self):
        """
        _stuffDatabase_

        Fill the dbsbuffer with some files and blocks.  We'll insert a total
        of 5 files spanning two blocks.  There will be a total of two datasets
        inserted into the datbase.

        We'll inject files with the location set as an SE name as well as a
        PhEDEx node name as well.
        """
        myThread = threading.currentThread()

        buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer",
                                    logger=myThread.logger,
                                    dbinterface=myThread.dbi)
        insertWorkflow = buffer3Factory(classname="InsertWorkflow")
        insertWorkflow.execute("BogusRequest", "BogusTask", 0, 0, 0, 0)

        checksums = {"adler32": "1234", "cksum": "5678"}
        testFileA = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileA.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileA.setDatasetPath(self.testDatasetA)
        testFileA.addRun(Run(2, *[45]))
        testFileA.create()

        testFileB = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileB.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileB.setDatasetPath(self.testDatasetA)
        testFileB.addRun(Run(2, *[45]))
        testFileB.create()

        testFileC = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileC.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileC.setDatasetPath(self.testDatasetA)
        testFileC.addRun(Run(2, *[45]))
        testFileC.create()

        self.testFilesA.append(testFileA)
        self.testFilesA.append(testFileB)
        self.testFilesA.append(testFileC)

        testFileD = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileD.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileD.setDatasetPath(self.testDatasetB)
        testFileD.addRun(Run(2, *[45]))
        testFileD.create()

        testFileE = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileE.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileE.setDatasetPath(self.testDatasetB)
        testFileE.addRun(Run(2, *[45]))
        testFileE.create()

        self.testFilesB.append(testFileD)
        self.testFilesB.append(testFileE)

        uploadFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                   logger=myThread.logger,
                                   dbinterface=myThread.dbi)
        datasetAction = uploadFactory(classname="NewDataset")
        createAction = uploadFactory(classname="CreateBlocks")

        datasetAction.execute(datasetPath=self.testDatasetA)
        datasetAction.execute(datasetPath=self.testDatasetB)

        self.blockAName = self.testDatasetA + "#" + makeUUID()
        self.blockBName = self.testDatasetB + "#" + makeUUID()

        newBlockA = DBSBufferBlock(name=self.blockAName,
                                   location="srm-cms.cern.ch",
                                   datasetpath=None)
        newBlockA.setDataset(self.testDatasetA, 'data', 'VALID')
        newBlockA.status = 'Closed'

        newBlockB = DBSBufferBlock(name=self.blockBName,
                                   location="srm-cms.cern.ch",
                                   datasetpath=None)
        newBlockB.setDataset(self.testDatasetB, 'data', 'VALID')
        newBlockB.status = 'Closed'

        createAction.execute(blocks=[newBlockA, newBlockB])

        bufferFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                   logger=myThread.logger,
                                   dbinterface=myThread.dbi)

        setBlock = bufferFactory(classname="DBSBufferFiles.SetBlock")
        setBlock.execute(testFileA["lfn"], self.blockAName)
        setBlock.execute(testFileB["lfn"], self.blockAName)
        setBlock.execute(testFileC["lfn"], self.blockAName)
        setBlock.execute(testFileD["lfn"], self.blockBName)
        setBlock.execute(testFileE["lfn"], self.blockBName)

        fileStatus = bufferFactory(classname="DBSBufferFiles.SetStatus")
        fileStatus.execute(testFileA["lfn"], "LOCAL")
        fileStatus.execute(testFileB["lfn"], "LOCAL")
        fileStatus.execute(testFileC["lfn"], "LOCAL")
        fileStatus.execute(testFileD["lfn"], "LOCAL")
        fileStatus.execute(testFileE["lfn"], "LOCAL")

        associateWorkflow = buffer3Factory(
            classname="DBSBufferFiles.AssociateWorkflowToFile")
        associateWorkflow.execute(testFileA["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileB["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileC["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileD["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileE["lfn"], "BogusRequest",
                                  "BogusTask")

        return
Example #56
0
    def testVerySimpleTest(self):
        """
        _VerySimpleTest_

        Just test that everything works...more or less
        """

        # return

        myThread = threading.currentThread()

        config = self.getConfig()

        name = makeUUID()
        nSubs = 5
        nFiles = 10
        workloadName = 'TestWorkload'

        dummyWorkload = self.createWorkload(workloadName=workloadName)
        workloadPath = os.path.join(self.testDir, 'workloadTest',
                                    'TestWorkload', 'WMSandbox',
                                    'WMWorkload.pkl')

        self.createJobCollection(name=name,
                                 nSubs=nSubs,
                                 nFiles=nFiles,
                                 workflowURL=workloadPath)

        testJobCreator = JobCreatorPoller(config=config)

        # First, can we run once without everything crashing?
        testJobCreator.algorithm()

        getJobsAction = self.daoFactory(classname="Jobs.GetAllJobs")
        result = getJobsAction.execute(state='Created', jobType="Processing")

        self.assertEqual(len(result), nSubs * nFiles)

        # Count database objects
        result = myThread.dbi.processData(
            'SELECT * FROM wmbs_sub_files_acquired')[0].fetchall()
        self.assertEqual(len(result), nSubs * nFiles)

        # Find the test directory
        testDirectory = os.path.join(self.testDir, 'jobCacheDir',
                                     'TestWorkload', 'ReReco')
        # It should have at least one jobGroup
        self.assertTrue('JobCollection_1_0' in os.listdir(testDirectory))
        # But no more then twenty
        self.assertTrue(len(os.listdir(testDirectory)) <= 20)

        groupDirectory = os.path.join(testDirectory, 'JobCollection_1_0')

        # First job should be in here
        listOfDirs = []
        for tmpDirectory in os.listdir(testDirectory):
            listOfDirs.extend(
                os.listdir(os.path.join(testDirectory, tmpDirectory)))
        self.assertTrue('job_1' in listOfDirs)
        self.assertTrue('job_2' in listOfDirs)
        self.assertTrue('job_3' in listOfDirs)
        jobDir = os.listdir(groupDirectory)[0]
        jobFile = os.path.join(groupDirectory, jobDir, 'job.pkl')
        self.assertTrue(os.path.isfile(jobFile))
        f = open(jobFile, 'r')
        job = pickle.load(f)
        f.close()

        self.assertEqual(job.baggage.PresetSeeder.generator.initialSeed, 1001)
        self.assertEqual(job.baggage.PresetSeeder.evtgenproducer.initialSeed,
                         1001)

        self.assertEqual(job['workflow'], name)
        self.assertEqual(len(job['input_files']), 1)
        self.assertEqual(os.path.basename(job['sandbox']),
                         'TestWorkload-Sandbox.tar.bz2')

        return
Example #57
0
    def execute(self, emulator=None):
        """
        _execute_

        """
        # Are we using emulators again?
        if emulator != None:
            return emulator.emulate(self.step, self.job)

        logging.info("Steps.Executors.%s.execute called",
                     self.__class__.__name__)

        # Search through steps for analysis files
        for step in self.stepSpace.taskSpace.stepSpaces():
            if step == self.stepName:
                # Don't try to parse your own report; it's not there yet
                continue
            stepLocation = os.path.join(self.stepSpace.taskSpace.location,
                                        step)
            logging.info("Beginning report processing for step %s", step)
            reportLocation = os.path.join(stepLocation, 'Report.pkl')
            if not os.path.isfile(reportLocation):
                logging.error("Cannot find report for step %s in space %s",
                              step, stepLocation)
                continue

            # First, get everything from a file and 'unpersist' it
            stepReport = Report()
            stepReport.unpersist(reportLocation, step)

            # Don't upload nor stage out files from bad steps.
            if not stepReport.stepSuccessful(step):
                continue

            # Pulling out the analysis files from each step
            analysisFiles = stepReport.getAnalysisFilesFromStep(step)

            # make sure all conditions from this job get the same uuid
            uuid = makeUUID()

            condFiles2copy = []
            lumiFiles2copy = []

            # Working on analysis files
            for analysisFile in analysisFiles:

                # deal with sqlite files
                if analysisFile.FileClass == "ALCA":

                    sqlitefile = analysisFile.fileName.replace(
                        'sqlite_file:', '', 1)

                    filenamePrefix = "Run%d@%s@%s" % (
                        self.step.condition.runNumber, analysisFile.inputtag,
                        uuid)
                    filenameDB = filenamePrefix + ".db"
                    filenameTXT = filenamePrefix + ".txt"

                    shutil.copy2(os.path.join(stepLocation, sqlitefile),
                                 filenameDB)

                    textoutput = "prepMetaData %s\n" % analysisFile.prepMetaData
                    textoutput += "prodMetaData %s\n" % analysisFile.prodMetaData

                    with open(filenameTXT, "w") as fout:
                        fout.write(textoutput)

                    os.chmod(
                        filenameDB, stat.S_IREAD | stat.S_IWRITE | stat.S_IRUSR
                        | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP
                        | stat.S_IROTH | stat.S_IWOTH)
                    os.chmod(
                        filenameTXT, stat.S_IREAD | stat.S_IWRITE
                        | stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP
                        | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH)

                    condFiles2copy.append(filenameDB)
                    condFiles2copy.append(filenameTXT)

                # deal with text files containing lumi info
                elif analysisFile.FileClass == "ALCATXT":

                    shutil.copy2(
                        os.path.join(stepLocation, analysisFile.fileName),
                        analysisFile.fileName)
                    lumiFiles2copy.append(analysisFile.fileName)

            # copy conditions files out and fake the job report
            addedOutputFJR = False
            if self.step.condition.lfnbase:
                logging.info("Copy out conditions files to %s",
                             self.step.condition.lfnbase)
                for file2copy in condFiles2copy:

                    logging.info("==> copy %s", file2copy)

                    targetLFN = os.path.join(self.step.condition.lfnbase,
                                             file2copy)
                    targetPFN = "root://eoscms//eos/cms%s" % targetLFN

                    command = "env XRD_WRITERECOVERY=0 xrdcp -s -f %s %s" % (
                        file2copy, targetPFN)

                    p = subprocess.Popen(command,
                                         shell=True,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.STDOUT)
                    output = p.communicate()[0]
                    if p.returncode > 0:
                        msg = 'Failure during condition copy to EOS:\n'
                        msg += '   %s\n' % output
                        logging.error(msg)
                        raise WMExecutionFailure(60319, "AlcaHarvestFailure",
                                                 msg)

                    # add fake output file to job report
                    addedOutputFJR = True
                    stepReport.addOutputFile(self.step.condition.outLabel,
                                             aFile={
                                                 'lfn':
                                                 targetLFN,
                                                 'pfn':
                                                 targetPFN,
                                                 'module_label':
                                                 self.step.condition.outLabel
                                             })

            # copy luminosity files out
            if self.step.luminosity.url:
                logging.info("Copy out luminosity files to %s",
                             self.step.luminosity.url)
                for file2copy in lumiFiles2copy:

                    logging.info("==> copy %s", file2copy)

                    targetPFN = rootUrlJoin(self.step.luminosity.url,
                                            file2copy)
                    if not targetPFN:
                        msg = 'No valid URL for lumi copy:\n'
                        msg += '   %s\n' % self.step.luminosity.url
                        logging.error(msg)
                        raise WMExecutionFailure(60319, "AlcaHarvestFailure",
                                                 msg)

                    command = "env XRD_WRITERECOVERY=0 xrdcp -s -f %s %s" % (
                        file2copy, targetPFN)

                    p = subprocess.Popen(command,
                                         shell=True,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.STDOUT)
                    output = p.communicate()[0]
                    if p.returncode > 0:
                        msg = 'Failure during copy to EOS:\n'
                        msg += '   %s\n' % output
                        logging.error(msg)
                        raise WMExecutionFailure(60319, "AlcaHarvestFailure",
                                                 msg)

            if not addedOutputFJR:
                # no conditions from AlcaHarvest is a valid result, can
                # happen if calibration algorithms produced no output
                # due to not enough statistics or other reasons
                #
                # add fake placeholder output file to job report
                logging.info(
                    "==> no sqlite files from AlcaHarvest job, creating placeholder file record"
                )
                stepReport.addOutputFile(self.step.condition.outLabel,
                                         aFile={
                                             'lfn':
                                             "/no/output",
                                             'pfn':
                                             "/no/output",
                                             'module_label':
                                             self.step.condition.outLabel
                                         })

            # Am DONE with report
            # Persist it
            stepReport.persist(reportLocation)

        return
Example #58
0
    def testFailJobInput(self):
        """
        _testFailJobInput_

        Test the Jobs.FailInput DAO and verify that it doesn't affect other
        jobs/subscriptions that run over the same files.
        """
        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")
        bogusWorkflow = Workflow(spec="spec1.xml",
                                 owner="Steve",
                                 name="wf002",
                                 task="Test")
        testWorkflow.create()
        bogusWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        bogusFileset = Fileset(name="BogusFileset")
        testFileset.create()
        bogusFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)
        bogusSubscription = Subscription(fileset=bogusFileset,
                                         workflow=bogusWorkflow)
        testSubscription.create()
        bogusSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN")
        testFileA.create()
        testFileB.create()
        testFileC.create()

        testFileset.addFile([testFileA, testFileB, testFileC])
        bogusFileset.addFile([testFileA, testFileB, testFileC])
        testFileset.commit()
        bogusFileset.commit()

        testSubscription.completeFiles([testFileA, testFileB, testFileC])
        bogusSubscription.acquireFiles([testFileA, testFileB, testFileC])

        testJobGroup = JobGroup(subscription=testSubscription)
        bogusJobGroup = JobGroup(subscription=bogusSubscription)
        testJobGroup.create()
        bogusJobGroup.create()

        testJobA = Job(name="TestJobA",
                       files=[testFileA, testFileB, testFileC])
        testJobB = Job(name="TestJobB",
                       files=[testFileA, testFileB, testFileC])

        bogusJob = Job(name="BogusJob",
                       files=[testFileA, testFileB, testFileC])

        testJobA.create(group=testJobGroup)
        testJobB.create(group=testJobGroup)

        bogusJob.create(group=bogusJobGroup)

        testJobA.failInputFiles()
        testJobB.failInputFiles()

        self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3)
        self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0)

        changeStateAction = self.daoFactory(classname="Jobs.ChangeState")
        testJobB["state"] = "cleanout"
        changeStateAction.execute([testJobB])

        # Try again

        testJobA.failInputFiles()

        # Should now be failed
        self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0)
        self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3)
        self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0)

        # bogus should be unchanged
        self.assertEqual(len(bogusSubscription.filesOfStatus("Available")), 0)
        self.assertEqual(len(bogusSubscription.filesOfStatus("Acquired")), 3)
        self.assertEqual(len(bogusSubscription.filesOfStatus("Failed")), 0)
        self.assertEqual(len(bogusSubscription.filesOfStatus("Completed")), 0)

        return
Example #59
0
    def createTestJobGroup(self,
                           config,
                           name="TestWorkthrough",
                           filesetName="TestFileset",
                           specLocation="spec.xml",
                           error=False,
                           task="/TestWorkload/ReReco",
                           type="Processing"):
        """
        Creates a group of several jobs

        """

        myThread = threading.currentThread()

        testWorkflow = Workflow(spec=specLocation,
                                owner=self.OWNERDN,
                                name=name,
                                task=task,
                                owner_vogroup="",
                                owner_vorole="")
        testWorkflow.create()
        self.inject.execute(names=[name], injected=True)

        testWMBSFileset = Fileset(name=filesetName)
        testWMBSFileset.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(10, *[12312]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(10, *[12314]))
        testFileB.setLocation('malpaquet')

        testFileA.create()
        testFileB.create()

        testWMBSFileset.addFile(testFileA)
        testWMBSFileset.addFile(testFileB)
        testWMBSFileset.commit()
        testWMBSFileset.markOpen(0)

        outputWMBSFileset = Fileset(name='%sOutput' % filesetName)
        outputWMBSFileset.create()
        testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10)
        testFileC.addRun(Run(10, *[12312]))
        testFileC.setLocation('malpaquet')
        testFileC.create()
        outputWMBSFileset.addFile(testFileC)
        outputWMBSFileset.commit()
        outputWMBSFileset.markOpen(0)

        testWorkflow.addOutput('output', outputWMBSFileset)

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow,
                                        type=type)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        for i in range(0, self.nJobs):
            testJob = Job(name=makeUUID())
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob['retry_count'] = 1
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312, 12313])
            testJobGroup.add(testJob)

        testJobGroup.commit()

        changer = ChangeState(config)

        report1 = Report()
        report2 = Report()
        if error:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 "WMComponent_t/JobAccountant_t/fwjrs",
                                 "badBackfillJobReport.pkl")
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        else:
            path1 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'mergeReport1.pkl')
            path2 = os.path.join(WMCore.WMBase.getTestBase(),
                                 'WMComponent_t/TaskArchiver_t/fwjrs',
                                 'logCollectReport2.pkl')
        report1.load(filename=path1)
        report2.load(filename=path2)

        changer.propagate(testJobGroup.jobs, 'created', 'new')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        for i in range(self.nJobs):
            if i < self.nJobs / 2:
                testJobGroup.jobs[i]['fwjr'] = report1
            else:
                testJobGroup.jobs[i]['fwjr'] = report2
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'jobcooloff', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'created', 'jobcooloff')
        changer.propagate(testJobGroup.jobs, 'executing', 'created')
        changer.propagate(testJobGroup.jobs, 'complete', 'executing')
        changer.propagate(testJobGroup.jobs, 'jobfailed', 'complete')
        changer.propagate(testJobGroup.jobs, 'retrydone', 'jobfailed')
        changer.propagate(testJobGroup.jobs, 'exhausted', 'retrydone')
        changer.propagate(testJobGroup.jobs, 'cleanout', 'exhausted')

        testSubscription.completeFiles([testFileA, testFileB])

        return testJobGroup
Example #60
0
    def createJobsLocationWise(self, fileset, endOfRun, dqmHarvestUnit,
                               lumiMask, goodRunList):

        myThread = threading.currentThread()
        fileset.loadData(parentage=0)
        allFiles = fileset.getFiles()

        # sort by location and run
        locationDict = {}
        runDict = {}
        for fileInfo in allFiles:

            locSet = frozenset(fileInfo['locations'])
            runSet = fileInfo.getRuns()

            if len(locSet) == 0:
                logging.error("File %s has no locations!", fileInfo['lfn'])
            if len(runSet) == 0:
                logging.error("File %s has no run information!",
                              fileInfo['lfn'])

            # Populate a dictionary with [location][run] so we can split jobs according to those different combinations
            if locSet not in locationDict.keys():
                locationDict[locSet] = {}

            fileInfo['runs'] = set()
            # Handle jobs with run whitelist/blacklist
            if goodRunList:
                runDict[fileInfo['lfn']] = set()
                for run in runSet:
                    if run.run in goodRunList:
                        runDict[fileInfo['lfn']].add(run)
                        if run.run in locationDict[locSet].keys():
                            locationDict[locSet][run.run].append(fileInfo)
                        else:
                            locationDict[locSet][run.run] = [fileInfo]
            elif lumiMask:
                # it has lumiMask, thus we consider only good run/lumis
                newRunSet = []
                for run in runSet:
                    if not isGoodRun(lumiMask, run.run):
                        continue
                    # then loop over lumis
                    maskedLumis = []
                    for lumi in run.lumis:
                        if not isGoodLumi(lumiMask, run.run, lumi):
                            continue
                        maskedLumis.append(lumi)

                    if not maskedLumis:
                        continue
                    maskedRun = Run(run.run, *maskedLumis)
                    newRunSet.append(maskedRun)

                    if run.run in locationDict[locSet].keys():
                        locationDict[locSet][run.run].append(fileInfo)
                    else:
                        locationDict[locSet][run.run] = [fileInfo]
                if newRunSet:
                    runDict[fileInfo['lfn']] = newRunSet
            else:
                # no LumiList and no run white or black list
                runDict[fileInfo['lfn']] = runSet
                for run in runSet:
                    if run.run in locationDict[locSet].keys():
                        locationDict[locSet][run.run].append(fileInfo)
                    else:
                        locationDict[locSet][run.run] = [fileInfo]

        # create separate jobs for different locations
        self.newGroup()
        self.jobCount = 0
        baseName = makeUUID()
        self.newGroup()

        if endOfRun:
            harvestType = "EndOfRun"
        else:
            harvestType = "Periodic"

        for location in locationDict.keys():

            if dqmHarvestUnit == "byRun":
                self.createJobByRun(locationDict, location, baseName,
                                    harvestType, runDict, endOfRun)
            else:
                self.createMultiRunJob(locationDict, location, baseName,
                                       harvestType, runDict, endOfRun)

        return