Beispiel #1
0
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)

        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                     "Saturn")
        xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl,
                                                   testSub.getDatasetPaths())
        result = self.phedexApi.subscribe(testSub, xmlData)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue(requestIDs[0].has_key("id"),
                        "Error: Missing request ID")
        return
    def setUp(self):
        """
        _setUp_

        Install the DBSBuffer schema into the database and connect to PhEDEx.
        """
        self.phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        self.dbsURL = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase = True)

        self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer"],
                                useDefault = False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package = "WMComponent.DBSBuffer.Database",
                                logger = myThread.logger,
                                dbinterface = myThread.dbi)

        locationAction = daofactory(classname = "DBSBufferFiles.AddLocation")
        locationAction.execute(siteName = "srm-cms.cern.ch")
        locationAction.execute(siteName = "se.fnal.gov")

        self.testFilesA = []
        self.testFilesB = []
        self.testDatasetA = "/%s/PromptReco-v1/RECO" % makeUUID()
        self.testDatasetB = "/%s/CRUZET11-v1/RAW" % makeUUID()
        self.phedex = PhEDEx({"endpoint": self.phedexURL}, "json")

        return
Beispiel #3
0
    def testLotsOfAncestors(self):
        """
        _testLotsOfAncestors_

        Create a file with 15 parents with each parent having 100 parents to
        verify that the query to return grandparents works correctly.
        """
        raise nose.SkipTest
        testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10,
                        checksums = {"cksum": "1"}, locations = "se1.fnal.gov")
        testFileA.create()

        for i in xrange(15):
            testParent = File(lfn = makeUUID(), size = 1024, events = 10,
                              checksums = {"cksum": "1"}, locations = "se1.fnal.gov")
            testParent.create()
            testFileA.addParent(testParent["lfn"])

            for i in xrange(100):
                testGParent = File(lfn = makeUUID(), size = 1024, events = 10,
                                   checksums = {"cksum": "1"}, locations = "se1.fnal.gov")
                testGParent.create()
                testParent.addParent(testGParent["lfn"])                

        assert len(testFileA.getAncestors(level = 2, type = "lfn")) == 1500, \
               "ERROR: Incorrect grand parents returned"
        
        return
Beispiel #4
0
    def createJobGroups(self,
                        nSubs,
                        nJobs,
                        task,
                        workloadSpec,
                        site,
                        bl=[],
                        wl=[],
                        taskType='Processing',
                        name=None):
        """
        _createJobGroups_

        Creates a series of jobGroups for submissions
        """

        jobGroupList = []

        if name is None:
            name = makeUUID()

        testWorkflow = Workflow(spec=workloadSpec,
                                owner="tapas",
                                name=name,
                                task="basicWorkload/Production")
        testWorkflow.create()

        # Create subscriptions
        for _ in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type=taskType,
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name,
                           task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site,
                           bl=bl,
                           wl=wl)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList
Beispiel #5
0
    def create(self):
        """
        Add the new jobgroup to WMBS, create the output Fileset object
        """
        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        #overwrite base class self.output for WMBS fileset
        self.output = Fileset(name=makeUUID())
        self.output.create()

        if self.uid == None:
            self.uid = makeUUID()

        action = self.daofactory(classname="JobGroup.New")
        action.execute(self.uid,
                       self.subscription["id"],
                       self.output.id,
                       conn=self.getDBConn(),
                       transaction=self.existingTransaction())

        self.id = self.exists()
        self.commitTransaction(existingTransaction)

        return
Beispiel #6
0
    def setUp(self):
        """
        _setUp_

        Install the DBSBuffer schema into the database and connect to PhEDEx.
        """
        self.phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/test"
        self.dbsURL = "http://vocms09.cern.ch:8880/cms_dbs_int_local_yy_writer/servlet/DBSServlet"

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)

        self.testInit.setSchema(customModules=["WMComponent.DBS3Buffer"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMComponent.DBSBuffer.Database",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="DBSBufferFiles.AddLocation")
        locationAction.execute(siteName="srm-cms.cern.ch")
        locationAction.execute(siteName="se.fnal.gov")

        self.testFilesA = []
        self.testFilesB = []
        self.testDatasetA = "/%s/PromptReco-v1/RECO" % makeUUID()
        self.testDatasetB = "/%s/CRUZET11-v1/RAW" % makeUUID()
        self.phedex = PhEDEx({"endpoint": self.phedexURL}, "json")

        return
Beispiel #7
0
    def testSubscription(self):
        """
        _testSubscription_

        Verify that the subscription API works.
        """
        datasetA = "/%s/WMCorePhEDExTest/RAW" % makeUUID()
        datasetB = "/%s/WMCorePhEDExTest/RECO" % makeUUID()
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetA)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        xmlData = XMLDrop.makePhEDExDrop(self.dbsTestUrl, datasetB)
        self.phedexApi.injectBlocks("T1_US_FNAL_MSS", xmlData)
        
        testSub = PhEDExSubscription([datasetA, datasetB], "T1_UK_RAL_MSS",
                                      "Saturn")
        xmlData = XMLDrop.makePhEDExXMLForDatasets(self.dbsTestUrl, 
                                                   testSub.getDatasetPaths())
        result = self.phedexApi.subscribe(testSub, xmlData)
        requestIDs = result["phedex"]["request_created"]

        self.assertEqual(len(requestIDs), 1,
                         "Error: Wrong number of request IDs")
        self.assertTrue(requestIDs[0].has_key("id"),
                        "Error: Missing request ID")
        return
Beispiel #8
0
    def testTime(self):

        nUIDs     = 100000
        startTime = time.clock()
        for i in range(0,nUIDs):
            makeUUID()
        print("We can make %i UUIDs in %f seconds" %(nUIDs, time.clock() - startTime))
Beispiel #9
0
    def create(self):
        """
        Add the new jobgroup to WMBS, create the output Fileset object
        """
        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        # overwrite base class self.output for WMBS fileset
        self.output = Fileset(name=makeUUID())
        self.output.create()

        if self.uid == None:
            self.uid = makeUUID()

        action = self.daofactory(classname="JobGroup.New")
        action.execute(
            self.uid,
            self.subscription["id"],
            self.output.id,
            conn=self.getDBConn(),
            transaction=self.existingTransaction(),
        )

        self.id = self.exists()
        self.commitTransaction(existingTransaction)

        return
Beispiel #10
0
    def createTestJob(self, subscriptionType="Merge"):
        """
        _createTestJob_

        Create a test job with two files as input.  This will also create the
        appropriate workflow, jobgroup and subscription.
        """
        testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))
        testFileA.create()
        testFileB.create()

        testJob = Job(name=makeUUID(), files=[testFileA, testFileB])
        testJob["couch_record"] = "somecouchrecord"
        testJob["location"] = "test.site.ch"
        testJob.create(group=testJobGroup)
        testJob.associateFiles()

        return testJob
Beispiel #11
0
    def test_AutoIncrementCheck(self):
        """
        _AutoIncrementCheck_

        Test and see whether we can find and set the auto_increment values
        """
        myThread = threading.currentThread()
        if not myThread.dialect.lower() == "mysql":
            return

        testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck")
        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 1)

        incrementDAO.execute()

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 2)

        incrementDAO.execute(input=10)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 11)

        incrementDAO.execute(input=5)

        testJob = Job()
        testJob.create(group=testJobGroup)
        self.assertEqual(testJob.exists(), 12)

        return
Beispiel #12
0
    def testTime(self):

        nUIDs = 100000
        startTime = time.clock()
        for i in range(0, nUIDs):
            makeUUID()
        print "We can make %i UUIDs in %f seconds" % (nUIDs,
                                                      time.clock() - startTime)
Beispiel #13
0
    def testMask(self):
        """
        _testMask_

        Test the new mask setup
        """

        testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob["mask"].addRunAndLumis(run=100, lumis=[101, 102])
        testJob["mask"].addRunAndLumis(run=200, lumis=[201, 202])
        testJob.create(group=testJobGroup)

        loadJob = Job(id=testJob.exists())
        loadJob.loadData()

        runs = loadJob["mask"].getRunAndLumis()
        self.assertEqual(len(runs), 2)
        self.assertEqual(runs[100], [[101, 102]])
        self.assertEqual(runs[200], [[201, 202]])

        bigRun = Run(100, *[101, 102, 103, 104])
        badRun = Run(300, *[1001, 1002])
        result = loadJob["mask"].filterRunLumisByMask([bigRun, badRun])

        self.assertEqual(len(result), 1)
        alteredRun = result.pop()
        self.assertEqual(alteredRun.run, 100)
        self.assertEqual(alteredRun.lumis, [101, 102])

        run0 = Run(300, *[1001, 1002])
        run1 = Run(300, *[1001, 1002])
        loadJob["mask"].filterRunLumisByMask([run0, run1])

        return
Beispiel #14
0
    def createFilesWithChildren(self, moreParentFiles, acqEra):
        """
        _createFilesWithChildren_

        Create several parentless files and then create child files.
        """
        parentFiles = []
        childFiles = []
        
        baseLFN = "/store/data/%s/Cosmics/RAW/v1/000/143/316/" % (acqEra)
        for i in range(10):
            testFile = DBSBufferFile(lfn = baseLFN + makeUUID() + ".root", size = 1024,
                                     events = 20, checksums = {"cksum": 1})
            testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_3_1_1",
                                  appFam = "RAW", psetHash = "GIBBERISH",
                                  configContent = "MOREGIBBERISH")
            testFile.setDatasetPath("/Cosmics/%s-v1/RAW" % (acqEra))

            lumis = []
            for j in range(10):
                lumis.append((i * 10) + j)
            testFile.addRun(Run(143316, *lumis))            

            testFile.setAcquisitionEra(acqEra)
            testFile.setProcessingVer("1")
            testFile.setGlobalTag("START54::All")
            testFile.create()
            testFile.setLocation("malpaquet")
            parentFiles.append(testFile)

        baseLFN = "/store/data/%s/Cosmics/RECO/v1/000/143/316/" % (acqEra)
        for i in range(5):
            testFile = DBSBufferFile(lfn = baseLFN + makeUUID() + ".root", size = 1024,
                                     events = 20, checksums = {"cksum": 1})
            testFile.setAlgorithm(appName = "cmsRun", appVer = "CMSSW_3_1_1",
                                  appFam = "RECO", psetHash = "GIBBERISH",
                                  configContent = "MOREGIBBERISH")
            testFile.setDatasetPath("/Cosmics/%s-v1/RECO" % (acqEra))

            lumis = []
            for j in range(20):
                lumis.append((i * 20) + j)
            testFile.addRun(Run(143316, *lumis))            

            testFile.setAcquisitionEra(acqEra)
            testFile.setProcessingVer("1")
            testFile.setGlobalTag("START54::All")
            testFile.create()
            testFile.setLocation("malpaquet")
            testFile.addParents([parentFiles[i * 2]["lfn"],
                                 parentFiles[i * 2 + 1]["lfn"]])
            testFile.addParents([moreParentFiles[i * 2]["lfn"],
                                 moreParentFiles[i * 2 + 1]["lfn"]])
            childFiles.append(testFile)            

        return (parentFiles, childFiles)
Beispiel #15
0
    def createJobGroups(self,
                        nSubs,
                        nJobs,
                        task,
                        workloadSpec,
                        site=None,
                        bl=[],
                        wl=[]):
        """
        Creates a series of jobGroups for submissions

        """

        jobGroupList = []

        testWorkflow = Workflow(spec=workloadSpec,
                                owner="mnorman",
                                name=makeUUID(),
                                task="basicWorkload/Production",
                                owner_vogroup='phgroup',
                                owner_vorole='cmsrole')
        testWorkflow.create()

        # Create subscriptions
        for i in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(fileset=testFileset,
                                            workflow=testWorkflow,
                                            type="Processing",
                                            split_algo="FileBased")
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(name=name,
                           task=task,
                           nJobs=nJobs,
                           jobGroup=testJobGroup,
                           fileset=testFileset,
                           sub=testSubscription.exists(),
                           site=site,
                           bl=bl,
                           wl=wl)

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList
Beispiel #16
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(i, *[45 + i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name="TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45 + i / 3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name="TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.multipleLumiSubscription = Subscription(
            fileset=self.multipleFileLumiset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleLumiSubscription = Subscription(
            fileset=self.singleLumiFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")

        return
Beispiel #17
0
    def createJobGroups(self, nSubs, nJobs, task, workloadSpec, site=None, bl=[], wl=[]):
        """
        Creates a series of jobGroups for submissions

        """

        jobGroupList = []

        testWorkflow = Workflow(
            spec=workloadSpec,
            owner="tapas",
            name=makeUUID(),
            task="basicWorkload/Production",
            owner_vogroup="phgroup",
            owner_vorole="cmsrole",
        )
        testWorkflow.create()

        # Create subscriptions
        for i in range(nSubs):

            name = makeUUID()

            # Create Fileset, Subscription, jobGroup
            testFileset = Fileset(name=name)
            testFileset.create()
            testSubscription = Subscription(
                fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased"
            )
            testSubscription.create()

            testJobGroup = JobGroup(subscription=testSubscription)
            testJobGroup.create()

            # Create jobs
            self.makeNJobs(
                name=name,
                task=task,
                nJobs=nJobs,
                jobGroup=testJobGroup,
                fileset=testFileset,
                sub=testSubscription.exists(),
                site=site,
                bl=bl,
                wl=wl,
            )

            testFileset.commit()
            testJobGroup.commit()
            jobGroupList.append(testJobGroup)

        return jobGroupList
Beispiel #18
0
    def getFiles(self, name, tier, nFiles=12, site="malpaquet", nLumis=1):
        """
        _getFiles_
        
        Create some dummy test files.
        """
        files = []

        (acqEra, procVer) = name.split("-")
        baseLFN = "/store/data/%s/Cosmics/RECO/%s/000/143/316/" % (acqEra,
                                                                   procVer)
        for f in range(nFiles):
            testFile = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root",
                                     size=1024,
                                     events=20,
                                     checksums={"cksum": 1})
            testFile.setAlgorithm(appName="cmsRun",
                                  appVer="CMSSW_3_1_1",
                                  appFam="RECO",
                                  psetHash="GIBBERISH",
                                  configContent="MOREGIBBERISH")
            testFile.setDatasetPath("/Cosmics/%s-%s/RECO" % (acqEra, procVer))
            lumis = []
            for i in range(nLumis):
                lumis.append((f * 1000000) + i)
            testFile.addRun(Run(1, *lumis))
            testFile.setAcquisitionEra(acqEra)
            testFile.setProcessingVer("0")
            testFile.setGlobalTag("START54::All")
            testFile.create()
            testFile.setLocation(site)
            files.append(testFile)

        baseLFN = "/store/data/%s/Cosmics/RAW-RECO/%s/000/143/316/" % (acqEra,
                                                                       procVer)
        testFileChild = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root",
                                      size=1024,
                                      events=10,
                                      checksums={'cksum': 1})
        testFileChild.setAlgorithm(appName="cmsRun",
                                   appVer="CMSSW_3_1_1",
                                   appFam="RAW-RECO",
                                   psetHash="GIBBERISH",
                                   configContent="MOREGIBBERISH")
        testFileChild.setDatasetPath("/Cosmics/%s-%s/RAW-RECO" %
                                     (acqEra, procVer))
        testFileChild.addRun(Run(1, *[45]))
        testFileChild.create()
        testFileChild.setLocation(site)

        testFileChild.addParents([x['lfn'] for x in files])
        return files
Beispiel #19
0
    def createResubmitSpec(self, serverUrl, couchDB):
        """
        _createResubmitSpec_
        Create a bogus resubmit workload.
        """
        self.site = "cmssrm.fnal.gov"
        workload = WMWorkloadHelper(WMWorkload("TestWorkload"))
        reco = workload.newTask("reco")
        workload.setOwnerDetails(name = "evansde77", group = "DMWM")

        # first task uses the input dataset
        reco.addInputDataset(primary = "PRIMARY", processed = "processed-v1", tier = "TIER1")
        reco.data.input.splitting.algorithm = "File"
        reco.setTaskType("Processing")
        cmsRunReco = reco.makeStep("cmsRun1")
        cmsRunReco.setStepType("CMSSW")
        reco.applyTemplates()
        cmsRunRecoHelper = cmsRunReco.getTypeHelper()
        cmsRunRecoHelper.addOutputModule("outputRECO",
                                        primaryDataset = "PRIMARY",
                                        processedDataset = "processed-v2",
                                        dataTier = "TIER2",
                                        lfnBase = "/store/dunkindonuts",
                                        mergedLFNBase = "/store/kfc")
        
        dcs = DataCollectionService(url = serverUrl, database = couchDB)

        def getJob(workload):
            job = Job()
            job["task"] = workload.getTask("reco").getPathName()
            job["workflow"] = workload.name()
            job["location"] = self.site
            job["owner"] = "evansde77"
            job["group"] = "DMWM"
            return job

        testFileA = WMFile(lfn = makeUUID(), size = 1024, events = 1024)
        testFileA.setLocation([self.site])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = WMFile(lfn = makeUUID(), size = 1024, events = 1024)
        testFileB.setLocation([self.site])
        testFileB.addRun(Run(1, 3, 4))
        testJobA = getJob(workload)
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        
        dcs.failedJobs([testJobA])
        topLevelTask = workload.getTopLevelTask()[0]
        workload.truncate("Resubmit_TestWorkload", topLevelTask.getPathName(), 
                          serverUrl, couchDB)
                                  
        return workload
Beispiel #20
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45+i]))
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *[45]))
        self.singleFileFileset.addFile(newFile)

        self.multipleFileLumiset = Fileset(name = "TestFileset3")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45+i/3]))
            self.multipleFileLumiset.addFile(newFile)

        self.singleLumiFileset = Fileset(name = "TestFileset4")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45]))
            self.singleLumiFileset.addFile(newFile)
            

        testWorkflow = Workflow()
        self.multipleFileSubscription  = Subscription(fileset = self.multipleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleFileSubscription    = Subscription(fileset = self.singleFileFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.multipleLumiSubscription  = Subscription(fileset = self.multipleFileLumiset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")
        self.singleLumiSubscription    = Subscription(fileset = self.singleLumiFileset,
                                                      workflow = testWorkflow,
                                                      split_algo = "EndOfRun",
                                                      type = "Processing")


        return
def createFile():
    """
    _createFile_

    Create a file with some random metdata.
    """
    newFile = File(lfn = makeUUID(), size = random.randrange(1024, 1048576, 1024),
                   events = random.randrange(10, 100000, 50),
                   parents = [File(lfn = makeUUID())],
                   locations = makeUUID())
    newFile["first_event"] = 0
    newFile["last_event"] = 0
    newFile["id"] = 1
    return newFile
Beispiel #22
0
    def testListRunningJobs(self):
        """
        _testListRunningJobs_

        Test the ListRunningJobs DAO.
        """
        testWorkflow = Workflow(spec = makeUUID(), owner = "Steve",
                                name = makeUUID(), task="Test")
        testWorkflow.create()

        testFileset = Fileset(name = "TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset = testFileset,
                                        workflow = testWorkflow,
                                        type = "Processing")
        testSubscription.create()

        testJobGroup = JobGroup(subscription = testSubscription)
        testJobGroup.create()

        testJobA = Job(name = makeUUID(), files = [])
        testJobA["couch_record"] = makeUUID()
        testJobA.create(group = testJobGroup)
        testJobA["state"] = "executing"

        testJobB = Job(name = makeUUID(), files = [])
        testJobB["couch_record"] = makeUUID()
        testJobB.create(group = testJobGroup)
        testJobB["state"] = "complete"

        testJobC = Job(name = makeUUID(), files = [])
        testJobC["couch_record"] = makeUUID()
        testJobC.create(group = testJobGroup)        
        testJobC["state"] = "new"

        changeStateAction = self.daoFactory(classname = "Jobs.ChangeState")
        changeStateAction.execute(jobs = [testJobA, testJobB, testJobC])

        runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs")
        runningJobs = runningJobsAction.execute()

        assert len(runningJobs) == 2, \
               "Error: Wrong number of running jobs returned."

        for runningJob in runningJobs:
            if runningJob["job_name"] == testJobA["name"]:
                assert runningJob["state"] == testJobA["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobA["couch_record"], \
                       "Error: Running job has wrong couch record."
            else:
                assert runningJob["job_name"] == testJobC["name"], \
                       "Error: Running job has wrong name."
                assert runningJob["state"] == testJobC["state"], \
                       "Error: Running job has wrong state."
                assert runningJob["couch_record"] == testJobC["couch_record"], \
                       "Error: Running job has wrong couch record."                

        return
Beispiel #23
0
def createFile():
    """
    _createFile_

    Create a file with some random metdata.
    """
    newFile = File(lfn=makeUUID(),
                   size=random.randrange(1024, 1048576, 1024),
                   events=random.randrange(10, 100000, 50),
                   parents=[File(lfn=makeUUID())],
                   locations=makeUUID())
    newFile["first_event"] = 0
    newFile["last_event"] = 0
    newFile["id"] = 1
    return newFile
Beispiel #24
0
class FileAndEventBased(JobFactory):
    """
    Split jobs by number of events
    """
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        An event base splitting algorithm.  All available files are split into a
        set number of events per job.  
        """
        jobGroups = []
        fileset = self.subscription.availableFiles()

        #  //
        # // get the event total
        #//
        eventsPerJob = int(kwargs.get("events_per_job", 5000))

        try:
            selectionAlgorithm = kwargs['selection_algorithm']
        except KeyError, e:
            selectionAlgorithm = None
        carryOver = 0

        for f in fileset:
            if selectionAlgorithm:
                if not selectionAlgorithm(f):
                    self.subscription.completeFiles([f])
                    continue
            self.newGroup()
            eventsInFile = int(f["events"])

            if eventsInFile == 0:
                self.newJob(name=makeUUID())
                self.currentJob.addFile(f)
                self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob, 0)
                continue

            currentEvent = 0
            while currentEvent < eventsInFile:
                self.newJob(name=makeUUID())
                self.currentJob.addFile(f)
                self.currentJob["mask"].setMaxAndSkipEvents(
                    eventsPerJob, currentEvent)
                currentEvent += eventsPerJob

        return
Beispiel #25
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name = "TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size = 1000, events = 100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name = "TestFileset2")
        newFile = File("/some/file/name", size = 1000, events = 100)
        newFile.setLocation('blenheim')
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset,
                                                     workflow = testWorkflow,
                                                     split_algo = "FileBased",
                                                     type = "Processing")
        self.singleFileSubscription = Subscription(fileset = self.singleFileFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "FileBased",
                                                   type = "Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        return
Beispiel #26
0
    def getBlock(self, newFile, dasBlocks, location, das):
        """
        _getBlock_

        This gets a new block by checking whether there is a
        pre-existant block. 
        """

        for block in dasBlocks:
            if not self.isBlockOpen(newFile = newFile, block = block):
                # Then the block can't fit the file
                # Close the block
                block.status = 'Pending'
                self.blockCache[block.getName()] = block
                dasBlocks.remove(block.getName())
            else:
                # Load it out of the cache
                currentBlock = self.blockCache.get(block.getName())
                return currentBlock
        # If there are no open blocks
        # Or we run out of blocks
        blockname = '%s#%s' % (newFile['datasetPath'],
                               makeUUID())
        newBlock = DBSBlock(name = blockname,
                            location = location, das = das)
        self.addNewBlock(block = newBlock)
        dasBlocks.append(blockname)
        return newBlock
Beispiel #27
0
def main():
    """main functionf for testing"""
    from WMCore.DataStructs.Job import Job
    from WMCore.DataStructs.File import File
    from WMCore.DataStructs.Run import Run
    from WMCore.DataStructs.JobPackage import JobPackage
    from WMCore.Services.UUID import makeUUID
    from WMCore.WMSpec.Makers.TaskMaker import TaskMaker

    factory = HarvestingWorkloadFactory()
    workload = factory("derp", getTestArguments())

    task = workload.getTask('Harvesting')

    job = Job("SampleJob")
    job["id"] = makeUUID()
    job["task"] = task.getPathName()
    job["workflow"] = workload.name()

    file = File(lfn="/store/relval/CMSSW_3_8_2/RelValMinBias/GEN-SIM-RECO/MC_38Y_V9-v1/0019/FEC5BB4D-BFAF-DF11-A52A-001A92810AD2.root")
    job.addFile(file)

    jpackage = JobPackage()
    jpackage[1] = job

    import pickle
    
    handle = open("%s/JobPackage.pkl" % os.getcwd(), 'w')
    pickle.dump(jpackage, handle)
    handle.close()

    taskMaker = TaskMaker(workload, os.getcwd())
    taskMaker.skipSubscription = True
    taskMaker.processWorkload()
    task.build(os.getcwd())
Beispiel #28
0
    def getBlock(self, newFile, dasBlocks, location, das):
        """
        _getBlock_

        This gets a new block by checking whether there is a
        pre-existant block.
        """

        for blockName in dasBlocks:
            block = self.blockCache.get(blockName)
            if not self.isBlockOpen(newFile=newFile, block=block):
                # Then the block can't fit the file
                # Close the block
                block.status = 'Pending'
                self.blockCache[blockName] = block
                dasBlocks.remove(blockName)
            else:
                # Load it out of the cache
                currentBlock = blockName
                return currentBlock
        # If there are no open blocks
        # Or we run out of blocks
        blockname = '%s#%s' % (newFile['datasetPath'], makeUUID())
        newBlock = DBSBlock(name=blockname, location=location, das=das)
        self.addNewBlock(block=newBlock)
        dasBlocks.append(blockname)
        return newBlock
Beispiel #29
0
    def createLargeFileBlock(self):
        """
        _createLargeFileBlock_
        
        Creates a large group of files for testing
        """
        testFileset = Fileset(name = "TestFilesetX")
        testFileset.create()
        for i in range(5000):
            newFile = File(makeUUID(), size = 1000, events = 100,
                           locations = set(["somese.cern.ch"]))
            newFile.create()
            testFileset.addFile(newFile)
        testFileset.commit()
            
        testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman",
                                name = "wf003", task="Test" )
        testWorkflow.create()

        largeSubscription = Subscription(fileset = testFileset,
                                                   workflow = testWorkflow,
                                                   split_algo = "FileBased",
                                                   type = "Processing")
        largeSubscription.create()

        return largeSubscription
Beispiel #30
0
    def testD_Profile(self):
        """
        _Profile_

        Profile with cProfile and time various pieces
        """
        return
        config = self.createConfig()

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 500
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config=config)
        cProfile.runctx("testDBSUpload.algorithm()",
                        globals(),
                        locals(),
                        filename="testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats(0.2)

        return
Beispiel #31
0
    def testC_Profile(self):
        """
        _Profile_

        DON'T RUN THIS!
        """

        return

        import cProfile, pstats

        myThread = threading.currentThread()

        name    = makeUUID()

        config = self.getConfig()

        jobList = self.createGiantJobSet(name = name, config = config,
                                         nSubs = 10, nJobs = 1000, nFiles = 10)

        cleanCouch = CleanCouchPoller(config = config)
        cleanCouch.setup()

        cProfile.runctx("cleanCouch.algorithm()", globals(), locals(), filename = "testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()
        return
Beispiel #32
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Implement merge algorithm for the subscription provided

        """
        fileset = list(self.subscription.availableFiles())

        mergeSize = int(kwargs['merge_size'])
        overflow  = bool(kwargs.get('all_files', False))
        fileset.sort()

        accumSize = 0
        jobFiles = Fileset()
        locationDict = self.sortByLocation()
        for location in locationDict:
            baseName = makeUUID()
            self.newGroup()
            for f in locationDict[location]:
                accumSize += f['size']
                jobFiles.addFile(f)
                if accumSize >= mergeSize:
                    self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
                                      files = jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
                    accumSize = 0
                    jobFiles = Fileset()

            if len(jobFiles) > 0:
                if overflow:
                    self.newJob(name = '%s-%s' % (baseName, len(self.currentGroup.jobs) + 1),
                                      files = jobFiles)
                    self.currentJob["mask"].setMaxAndSkipEvents(-1, 0)
Beispiel #33
0
    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")


        return
Beispiel #34
0
    def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement):
        """
        _createJob_

        create an express job processing
        the passed in list of streamers

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name="%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id=streamer['id'], lfn=streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave=False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 0.5MB/s repack speed
        #   - 45s/evt reco speed
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #   - streamer or RAW on local disk (factor 1)
        #   - FEVT/ALCARECO/DQM on local disk (factor 4)
        jobTime = 300 + jobSize / 500000 + jobEvents * 45 + (jobSize * 4 *
                                                             2) / 5000000
        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                             disk=(jobSize * 5) / 1024,
                                             memory=memoryRequirement)

        return
Beispiel #35
0
    def getBlock(self, newFile, location, das, skipOpenCheck=False):
        """
        _getBlock_

        Retrieve a block is one exists and is open.  If no open block is found
        create and return a new one.
        """
        if das in self.dasCache.keys() and location in self.dasCache[das].keys(
        ):
            for blockName in self.dasCache[das][location]:
                block = self.blockCache.get(blockName)
                if not self.isBlockOpen(newFile=newFile,
                                        block=block) and not skipOpenCheck:
                    # Block isn't open anymore.  Mark it as pending so that it gets
                    # uploaded.
                    block.setPendingAndCloseBlock()
                    self.blockCache[blockName] = block
                else:
                    return block

        # A suitable open block does not exist.  Create a new one.
        blockname = "%s#%s" % (newFile["datasetPath"], makeUUID())
        newBlock = DBSBlock(name=blockname,
                            location=location,
                            das=das,
                            workflow=newFile["workflow"])
        self.addNewBlock(block=newBlock)
        return newBlock
Beispiel #36
0
    def testFileset(self):
        """
        _testFileset_

        Verify that converting an ACDC fileset to a DataStructs fileset works
        correctly.
        """
        testCollection = CouchCollection(database = self.testInit.couchDbName,
                                         url = self.testInit.couchUrl, 
                                         name = "Thunderstruck")
        testCollection.setOwner(self.owner)
        testFileset = CouchFileset(database = self.testInit.couchDbName,
                                   url = self.testInit.couchUrl,
                                   name = "TestFileset")
        testCollection.addFileset(testFileset)

        testFiles = {}
        for i in range(5):
            lfn = makeUUID()
            testFile = File(lfn = lfn, size = random.randint(1024, 4096),
                            events = random.randint(1024, 4096))
            testFiles[lfn] = testFile
            testFileset.add([testFile])

        for file in testFileset.fileset().files:
            self.assertTrue(file["lfn"] in testFiles.keys(),
                            "Error: File missing.")
            self.assertEqual(file["events"], testFiles[file["lfn"]]["events"],
                             "Error: Wrong number of events.")
            self.assertEqual(file["size"], testFiles[file["lfn"]]["size"],
                             "Error: Wrong file size.")
        return
Beispiel #37
0
    def test05(self):
        """
        _test05_

        Test multi lumis express merges with holes

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription2)

        time.sleep(1)

        mySplitArgs['maxLatency'] = 1
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        return
Beispiel #38
0
    def __call__(self):
        """
        __call__

        Generate some random data
        """

        # Generate somewhere between one and a thousand files
        name = "ThisIsATest_%s" % (makeUUID())
        nFiles = random.randint(10, 2000)
        name = name.replace('-', '_')
        name = '%s-v0' % name
        files = self.getFiles(name=name, nFiles=nFiles)

        print "Inserting %i files for dataset %s" % (nFiles * 2, name)

        try:
            self.dbsUploader.algorithm()
        except:
            self.dbsUploader.close()
            raise

        # Repeat just to make sure
        try:
            self.dbsUploader.algorithm()
        except:
            self.dbsUploader.close()
            raise

        return
Beispiel #39
0
    def createLargeFileBlock(self):
        """
        _createLargeFileBlock_

        Creates a large group of files for testing
        """
        testFileset = Fileset(name="TestFilesetX")
        testFileset.create()
        for i in range(5000):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.create()
            testFileset.addFile(newFile)
        testFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="mnorman",
                                name="wf003",
                                task="Test")
        testWorkflow.create()

        largeSubscription = Subscription(fileset=testFileset,
                                         workflow=testWorkflow,
                                         split_algo="FileBased",
                                         type="Processing")
        largeSubscription.create()

        return largeSubscription
Beispiel #40
0
    def createJob(self, streamerList, jobEvents, jobSize, memoryRequirement, numberOfCores = 1):
        """
        _createJob_

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        if numberOfCores > 1:
            self.currentJob.addBaggageParameter("numberOfCores", numberOfCores)

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization
        #   - 0.5MB/s repack speed
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #   - RAW on local disk (factor 1)
        jobTime = 300 + jobSize/500000 + (jobSize*2)/5000000
        self.currentJob.addResourceEstimates(jobTime = jobTime, disk = jobSize/1024, memory = memoryRequirement)

        return
Beispiel #41
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.multipleFileFileset = Fileset(name="TestFileset1")
        for i in range(10):
            newFile = File(makeUUID(), size=1000, events=100)
            newFile.setLocation('blenheim')
            newFile.setLocation('malpaquet')
            self.multipleFileFileset.addFile(newFile)

        self.singleFileFileset = Fileset(name="TestFileset2")
        newFile = File("/some/file/name", size=1000, events=100)
        newFile.setLocation('blenheim')
        self.singleFileFileset.addFile(newFile)

        testWorkflow = Workflow()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FileBased",
            type="Processing")

        #self.multipleFileSubscription.create()
        #self.singleFileSubscription.create()

        return
Beispiel #42
0
    def testE_TestNonProxySplitting(self):
        """
        _TestNonProxySplitting_

        Test and see if we can split things without a proxy.
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        config.JobCreator.workerThreads = 1

        name         = makeUUID()
        workloadName = 'TestWorkload'

        workload = self.createWorkload(workloadName = workloadName)

        workloadPath = os.path.join(self.testDir, 'workloadTest', 'TestWorkload', 'WMSandbox', 'WMWorkload.pkl')

        self.stuffWMBS(workflowURL = workloadPath, name = name)

        testJobCreator = JobCreatorPoller(config = config)

        testJobCreator.algorithm()

        getJobsAction = self.daoFactory(classname = "Jobs.GetAllJobs")
        result = getJobsAction.execute(state = 'Created', jobType = "Processing")

        self.assertEqual(len(result), 1)

        result = getJobsAction.execute(state = 'Created', jobType = "Merge")
        self.assertEqual(len(result), 0)

        return
    def getBlock(self, newFile, location, skipOpenCheck=False):
        """
        _getBlock_

        Retrieve a block is one exists with matching datasetpath/location and is open.
        If no such block is found create and return a new one.
        """
        datasetpath = newFile["datasetPath"]

        for block in self.blockCache.values():
            if datasetpath == block.getDatasetPath(
            ) and location == block.getLocation():
                if not self.isBlockOpen(newFile=newFile,
                                        block=block) and not skipOpenCheck:
                    # Block isn't open anymore.  Mark it as pending so that it gets uploaded.
                    block.setPendingAndCloseBlock()
                else:
                    return block

        # A suitable open block does not exist.  Create a new one.
        blockname = "%s#%s" % (datasetpath, makeUUID())
        newBlock = DBSBufferBlock(name=blockname,
                                  location=location,
                                  datasetpath=datasetpath)
        self.blockCache[blockname] = newBlock
        return newBlock
Beispiel #44
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        A time delay job splitting algorithm, will shove all unacquired
        files into a new job if the trigger_time has been passed
        """

        #  //
        # // get the fileset
        #//
        fileset = self.subscription.getFileset()
        trigger_time = int(kwargs['trigger_time'])
        if (trigger_time < time.time()):
            availFiles = self.subscription.availableFiles()
            if (len(availFiles) == 0):
                # no files to acquire
                return []

            baseName = makeUUID()
            self.newGroup()
            self.newJob(name='%s-endofrun' % (baseName, ))

            for f in availFiles:
                self.currentJob.addFile(f)
Beispiel #45
0
    def createSubscription(self,
                           nFiles,
                           lumisPerFile,
                           twoSites=False,
                           rand=False):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        parentFile = File('%s_parent' % (baseName),
                          size=1000,
                          events=100,
                          locations=set(["somese.cern.ch"]))
        parentFile.create()
        for i in range(nFiles):
            newFile = File(lfn='%s_%i' % (baseName, i),
                           size=1000,
                           events=100,
                           locations="somese.cern.ch")
            lumis = []
            for lumi in range(lumisPerFile):
                if rand:
                    lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                else:
                    lumis.append((100 * i) + lumi)
            newFile.addRun(Run(i, *lumis))
            newFile.create()
            newFile.addParent(parentFile['lfn'])
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = File(lfn='%s_%i_2' % (baseName, i),
                               size=1000,
                               events=100,
                               locations="otherse.cern.ch")
                lumis = []
                for lumi in range(lumisPerFile):
                    if rand:
                        lumis.append(random.randint(1000 * i, 1000 * (i + 1)))
                    else:
                        lumis.append((100 * i) + lumi)
                newFile.addRun(Run(i, *lumis))
                newFile.create()
                newFile.addParent(parentFile['lfn'])
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")
        testSubscription.create()

        return testSubscription
Beispiel #46
0
    def testC_FailTest(self):
        """
        _FailTest_

        THIS TEST IS DANGEROUS!
        Figure out what happens when we trigger rollbacks
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSUpload.abortStepTwo = True

        originalOut = sys.stdout
        originalErr = sys.stderr

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = "/%s/%s/%s" % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config=config)

        try:
            testDBSUpload.algorithm()
        except Exception, ex:
            pass
Beispiel #47
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        A time delay job splitting algorithm, will shove all unacquired
        files into a new job if the trigger_time has been passed
        """

        #  //
        # // get the fileset
        #//
        fileset = self.subscription.getFileset()
        trigger_time = kwargs['trigger_time']
        if (trigger_time < time.time()):
            availFiles = self.subscription.availableFiles()
            if (len(availFiles) == 0):
                # no files to acquire
                return []
                
            baseName = makeUUID()
            self.newGroup()
            self.newJob(name = '%s-endofrun' % (baseName,))
            
            for f in availFiles:                    
                self.currentJob.addFile(f)
Beispiel #48
0
    def testF_DBSUploadQueueSizeCheckForAlerts(self):
        """
        Test will not trigger a real alert being sent unless doing some
        mocking of the methods used during DBSUploadPoller.algorithm() ->
        DBSUploadPoller.uploadBlocks() method.
        As done here, it probably can't be deterministic, yet the feature
        shall be checked.

        """
        sizeLevelToTest = 1
        myThread = threading.currentThread()
        config = self.createConfig()
        # threshold / value to check
        config.DBSUpload.alertUploadQueueSize = sizeLevelToTest

        # without this uploadBlocks method returns immediately
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = sizeLevelToTest + 1
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = "/%s/%s/%s" % (name, name, tier)

        # load components that are necessary to check status
        # (this seems necessary, else some previous tests started failing)
        factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)
        testDBSUpload = DBSUploadPoller(config)
        # this is finally where the action (alert) should be triggered from
        testDBSUpload.algorithm()

        return
Beispiel #49
0
    def testF_DBSUploadQueueSizeCheckForAlerts(self):
        """
        Test will not trigger a real alert being sent unless doing some
        mocking of the methods used during DBSUploadPoller.algorithm() ->
        DBSUploadPoller.uploadBlocks() method.
        As done here, it probably can't be deterministic, yet the feature
        shall be checked.

        """
        sizeLevelToTest = 1
        myThread = threading.currentThread()
        config = self.createConfig()
        # threshold / value to check
        config.DBSUpload.alertUploadQueueSize = sizeLevelToTest

        # without this uploadBlocks method returns immediately
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = sizeLevelToTest + 1
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # load components that are necessary to check status
        # (this seems necessary, else some previous tests started failing)
        factory = WMFactory("dbsUpload", "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config = config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef = True)
        testDBSUpload = DBSUploadPoller(config)
        # this is finally where the action (alert) should be triggered from
        testDBSUpload.algorithm()

        return
Beispiel #50
0
    def __call__(self):
        """
        __call__

        Generate some random data
        """

        # Generate somewhere between one and a thousand files
        name = "ThisIsATest_%s" % (makeUUID())
        nFiles = random.randint(10, 2000)
        name = name.replace('-', '_')
        name = '%s-v0' % name
        files = self.getFiles(name = name, nFiles = nFiles)

        print "Inserting %i files for dataset %s" % (nFiles * 2, name)

        try:
            self.dbsUploader.algorithm()
        except:
            self.dbsUploader.close()
            raise

        # Repeat just to make sure
        try:
            self.dbsUploader.algorithm()
        except:
            self.dbsUploader.close()
            raise

        
        return
Beispiel #51
0
    def testC_FailTest(self):
        """
        _FailTest_

        THIS TEST IS DANGEROUS!
        Figure out what happens when we trigger rollbacks
        """
        myThread = threading.currentThread()
        config = self.createConfig()
        config.DBSUpload.abortStepTwo = True

        originalOut = sys.stdout
        originalErr = sys.stderr

        dbsInterface = DBSInterface(config = config)
        localAPI     = dbsInterface.getAPIRef()
        globeAPI     = dbsInterface.getAPIRef(globalRef = True)

        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name = name, tier = tier, nFiles = nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        testDBSUpload = DBSUploadPoller(config = config)

        try:
            testDBSUpload.algorithm()
        except Exception, ex:
            pass
    def createSubscription(self, nFiles, lumisPerFile, twoSites=False, nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        for i in range(nFiles):
            newFile = self.createFile("%s_%i" % (baseName, i), nEventsPerFile, i, lumisPerFile, "somese.cern.ch")
            newFile.create()
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile("%s_%i_2" % (baseName, i), nEventsPerFile, i, lumisPerFile, "otherse.cern.ch")
                newFile.create()
                testFileset.addFile(newFile)
        testFileset.commit()

        testSubscription = Subscription(
            fileset=testFileset, workflow=self.testWorkflow, split_algo="EventAwareLumiBased", type="Processing"
        )
        testSubscription.create()

        return testSubscription
Beispiel #53
0
    def createSubscription(self,
                           nFiles,
                           lumisPerFile,
                           twoSites=False,
                           nEventsPerFile=100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i),
                                          nEventsPerFile, i, lumisPerFile,
                                          'malpaquet')
                testFileset.addFile(newFile)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")

        return testSubscription
Beispiel #54
0
    def atestC_Profile(self):
        """
        _Profile_

        DON'T RUN THIS!
        """

        return

        import cProfile, pstats

        myThread = threading.currentThread()

        name = makeUUID()

        config = self.getConfig()

        jobList = self.createGiantJobSet(name=name,
                                         config=config,
                                         nSubs=10,
                                         nJobs=1000,
                                         nFiles=10)

        testTaskArchiver = TaskArchiverPoller(config=config)

        cProfile.runctx("testTaskArchiver.algorithm()",
                        globals(),
                        locals(),
                        filename="testStats.stat")

        p = pstats.Stats('testStats.stat')
        p.sort_stats('cumulative')
        p.print_stats()

        return
Beispiel #55
0
    def create(self, group):
        """
        _create_

        Write the job to the database.
        """
        if self["id"] != None:
            return

        existingTransaction = self.beginTransaction()

        self["jobgroup"] = group.id

        if self["name"] == None:
            self["name"] = makeUUID()

        jobAction = self.daofactory(classname="Jobs.New")
        jobAction.execute(jobgroup=self["jobgroup"],
                          name=self["name"],
                          couch_record=self["couch_record"],
                          location=self["location"],
                          cache_dir=self['cache_dir'],
                          outcome=self['outcome'],
                          fwjr=self['fwjr'],
                          conn=self.getDBConn(),
                          transaction=self.existingTransaction())

        self.exists()

        self['mask'].save(jobID=self['id'])

        self.associateFiles()
        self.commitTransaction(existingTransaction)
        return
Beispiel #56
0
    def createJob(self, streamerList, jobEvents, jobSize, timePerEvent, sizePerEvent, memoryRequirement):
        """
        _createJob_

        create an express job processing
        the passed in list of streamers

        """
        if not self.createdGroup:
            self.newGroup()
            self.createdGroup = True

        self.newJob(name = "%s-%s" % (self.jobNamePrefix, makeUUID()))

        for streamer in streamerList:
            f = File(id = streamer['id'],
                     lfn = streamer['lfn'])
            f.setLocation(streamer['location'], immediateSave = False)
            self.currentJob.addFile(f)

        # job time based on
        #   - 5 min initialization (twice)
        #   - 0.5MB/s repack speed
        #   - reco with timePerEvent
        #   - checksum calculation at 5MB/s
        #   - stageout at 5MB/s
        # job disk based on
        #   - streamer or RAW on local disk (factor 1)
        #   - FEVT/ALCARECO/DQM on local disk (sizePerEvent)
        jobTime = 600 + jobSize/500000 + jobEvents*timePerEvent + (jobEvents*sizePerEvent*2)/5000000
        self.currentJob.addResourceEstimates(jobTime = min(jobTime, 47*3600),
                                             disk = min(jobSize/1024 + jobEvents*sizePerEvent, 20000000),
                                             memory = memoryRequirement)

        return
Beispiel #57
0
    def createSubscription(self, nFiles, lumisPerFile, twoSites = False, nEventsPerFile = 100):
        """
        _createSubscription_

        Create a subscription for testing
        """

        baseName = makeUUID()

        testFileset = Fileset(name = baseName)
        for i in range(nFiles):
            newFile = self.createFile('%s_%i' % (baseName, i), nEventsPerFile,
                                      i, lumisPerFile, 'blenheim')
            testFileset.addFile(newFile)
        if twoSites:
            for i in range(nFiles):
                newFile = self.createFile('%s_%i_2' % (baseName, i), nEventsPerFile,
                                          i, lumisPerFile, 'malpaquet')
                testFileset.addFile(newFile)


        testSubscription = Subscription(fileset = testFileset,
                                         workflow = self.testWorkflow,
                                         split_algo = "EventAwareLumiBased",
                                         type = "Processing")

        return testSubscription
Beispiel #58
0
    def testUUID(self):

        listOfIDs = []

        splitID = None

        for i in range(0, 1000):
            tmpID = makeUUID()
            if not splitID:
                splitID = tmpID.split('-')
            tmpSplit = tmpID.split('-')
            self.assertEqual(tmpSplit[1], splitID[1], "Second component of UUID not the same %s != %s" \
                             %(tmpSplit[1], splitID[1]))
            self.assertEqual(tmpSplit[2], splitID[2], "Third component of UUID not the same %s != %s" \
                             %(tmpSplit[2], splitID[2]))
            self.assertEqual(tmpSplit[4], splitID[4], "Fourth component of UUID not the same %s != %s" \
                             %(tmpSplit[4], splitID[4]))
            self.assertEqual(type(tmpID), str)
            self.assertEqual(
                listOfIDs.count(tmpID), 0,
                "UUID repeated!  %s found in list %i times!" %
                (tmpID, listOfIDs.count(tmpID)))
            listOfIDs.append(tmpID)

        return