Esempio n. 1
0
    def testRunWhitelist(self):
        """
        ReReco lumi split with Run whitelist
        This test may not do much of anything anymore since listRunLumis is not in DBS3
        """

        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([181061, 181175])
        Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(1, len(units))
            # Check number of jobs in element match number for
            # dataset in run whitelist
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(dataset=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                        # This is what it is with DBS3 unless we calculate it
                        self.assertEqual(runLumis[run], None)
            self.assertEqual(250, int(wq_jobs))
Esempio n. 2
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _ = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units))
            blocks = [] # fill with blocks as we get work units for them
            for unit in units:
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Esempio n. 3
0
    def notestParentProcessing(self):
        # Does not work with a RAW dataset, need a different workload
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(
            parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        parentProcSpec = factory.factoryWorkloadConstruction(
            'testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _ = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(47, len(units))
            blocks = []  # fill with blocks as we get work units for them
            for unit in units:
                import pdb
                pdb.set_trace()
                self.assertTrue(1 <= unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(
                len(units),
                len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Esempio n. 4
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task)

        # invalid dbs url
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task)

        # invalid dataset name
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)

        # invalid run whitelist
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666]) # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
Esempio n. 5
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec,
                              task)

        # invalid dataset name
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec
                     ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET

        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec,
                              task)

        # invalid run whitelist
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec,
                              task)
Esempio n. 6
0
    def testRunWhitelist(self):
        """ReReco lumi split with Run whitelist"""
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(8)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(20)
        splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([180899, 180992])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 45)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            jobs = 0
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                # This fails. listRunLumis does not work correctly with DBS3, returning None for the # of lumis in a run
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(block = unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                        jobs += runLumis[run]
            self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
Esempio n. 7
0
    def getProcessingACDCSpec(self,
                              splittingAlgo='LumiBased',
                              splittingArgs=None,
                              setLocationFlag=False):
        """
        _getProcessingACDCSpec_

        Get a ACDC spec for the processing task of a ReReco workload
        """
        if splittingArgs is None:
            splittingArgs = {'lumis_per_job': 8}
        factory = ReRecoWorkloadFactory()
        rerecoArgs = ReRecoWorkloadFactory.getTestArguments()
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        rerecoArgs["Requestor"] = self.user
        rerecoArgs["Group"] = self.group
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            self.workflowName, rerecoArgs)
        Tier1ReRecoWorkload.truncate('ACDC_%s' % self.workflowName,
                                     '/%s/DataProcessing' % self.workflowName,
                                     self.couchUrl, self.acdcDBName)
        Tier1ReRecoWorkload.setJobSplittingParameters(
            '/ACDC_%s/DataProcessing' % self.workflowName, splittingAlgo,
            splittingArgs)
        Tier1ReRecoWorkload.setSiteWhitelist(self.siteWhitelist)
        if setLocationFlag:
            Tier1ReRecoWorkload.setTrustLocationFlag(setLocationFlag)
        #            Tier1ReRecoWorkload.setSiteWhitelist(self.siteWhitelist)
        return Tier1ReRecoWorkload
Esempio n. 8
0
 def testRunWhitelist(self):
     """
     ReReco lumi split with Run whitelist
     This test may not do much of anything anymore since listRunLumis is not in DBS3
     """
     # get files with multiple runs
     Globals.GlobalParams.setNumOfRunsPerFile(2)
     # a large number of lumis to ensure we get multiple runs
     Globals.GlobalParams.setNumOfLumisPerBlock(10)
     splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setRunWhitelist([2, 3])
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                              inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         # Check number of jobs in element match number for
         # dataset in run whitelist
         wq_jobs = 0
         for unit in units:
             wq_jobs += unit['Jobs']
             runLumis = dbs[inputDataset.dbsurl].listRunLumis(dataset=unit['Inputs'].keys()[0])
             print "runLumis", runLumis
             for run in runLumis:
                 if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                     self.assertEqual(runLumis[run], None)  # This is what it is with DBS3 unless we calculate it
         self.assertEqual(75, int(wq_jobs))
Esempio n. 9
0
    def getMergeACDCSpec(self,
                         splittingAlgo='ParentlessMergeBySize',
                         splittingArgs=None):
        """
        _getMergeACDCSpec_

        Get a ACDC spec for the merge task of a ReReco workload
        """
        if splittingArgs is None:
            splittingArgs = {}

        factory = ReRecoWorkloadFactory()
        rerecoArgs = ReRecoWorkloadFactory.getTestArguments()
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        rerecoArgs["Requestor"] = self.user
        rerecoArgs["Group"] = self.group
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            self.workflowName, rerecoArgs)
        Tier1ReRecoWorkload.truncate(
            'ACDC_%s' % self.workflowName,
            '/%s/DataProcessing/DataProcessingMergeRECOoutput' %
            self.workflowName, self.couchUrl, self.acdcDBName)
        Tier1ReRecoWorkload.setJobSplittingParameters(
            '/ACDC_%s/DataProcessingMergeRECOoutput' % self.workflowName,
            splittingAlgo, splittingArgs)
        return Tier1ReRecoWorkload
Esempio n. 10
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2)
        parentProcArgs = getReRecoArgs(parent=True)
        parentProcArgs["ConfigCacheID"] = createConfig(
            parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        # This dataset does have parents. Adding it here to keep the test going.
        # It seems like "dbs" below is never used
        parentProcArgs2 = {}
        parentProcArgs2.update(parentProcArgs)
        parentProcArgs2.update({
            'InputDataset':
            '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'
        })
        parentProcSpec = factory.factoryWorkloadConstruction(
            'testParentProcessing', parentProcArgs2)
        parentProcSpec.setStartPolicy('Dataset', **splitArgs)
        inputDataset = getFirstTask(parentProcSpec).getInputDatasetPath()
        for task in parentProcSpec.taskIterator():
            units, _ = Dataset(**splitArgs)(parentProcSpec, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(847, unit['Jobs'])
                self.assertEqual(1694, unit['NumberOfLumis'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(unit['Inputs'].keys(), [inputDataset])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(0, len(unit['ParentData']))
Esempio n. 11
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
         'ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.data.request.priority = 69
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(47, len(units))
         blocks = []  # fill with blocks as we get work units for them
         for unit in units:
             self.assertEqual(69, unit['Priority'])
             self.assertTrue(1 <= unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertTrue(1 <= unit['NumberOfLumis'])
             self.assertTrue(1 <= unit['NumberOfFiles'])
             self.assertTrue(0 <= unit['NumberOfEvents'])
         self.assertEqual(
             len(units),
             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Esempio n. 12
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2)
        parentProcArgs = getReRecoArgs(parent=True)
        parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        # This dataset does have parents. Adding it here to keep the test going.
        # It seems like "dbs" below is never used
        parentProcArgs2 = {}
        parentProcArgs2.update(parentProcArgs)
        parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'})
        parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs2)
        parentProcSpec.setStartPolicy('Dataset', **splitArgs)
        inputDataset = getFirstTask(parentProcSpec).getInputDatasetPath()
        for task in parentProcSpec.taskIterator():
            units, _ = Dataset(**splitArgs)(parentProcSpec, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(847, unit['Jobs'])
                self.assertEqual(1694, unit['NumberOfLumis'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(unit['Inputs'].keys(), [inputDataset])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(0, len(unit['ParentData']))
Esempio n. 13
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        Globals.GlobalParams.setNumOfRunsPerFile(3)
        Globals.GlobalParams.setNumOfLumisPerBlock(5)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList = {'206371': [[1, 50], [60,70]], '180899':[[1,1]], } )

        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        units, rejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Esempio n. 14
0
    def testGetMaskedBlocks(self):
        """
        _testGetMaskedBlocks_

        Check that getMaskedBlocks is returning the correct information
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()
        inputDataset.primary = 'SingleElectron'
        inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380'
        inputDataset.tier = 'USER'

        task.data.input.splitting.runs = [166921, 166429, 166911]
        task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20']
        lumiMask = LumiList(compactList={'166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], })
        inputLumis = LumiList(compactList={'166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], })
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = DBSReader(inputDataset.dbsurl)
        maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(task, dbs, dataset)
        for dummyBlock, files in maskedBlocks.iteritems():
            for dummyFile, lumiList in files.iteritems():
                self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
Esempio n. 15
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.data.request.priority = 69
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                                  inputDataset.processed,
                                  inputDataset.tier)
     dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(47, len(units))
         blocks = [] # fill with blocks as we get work units for them
         for unit in units:
             self.assertEqual(69, unit['Priority'])
             self.assertTrue(1 <= unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertTrue(1 <= unit['NumberOfLumis'])
             self.assertTrue(1 <= unit['NumberOfFiles'])
             self.assertTrue(0 <= unit['NumberOfEvents'])
         self.assertEqual(len(units),
                          len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Esempio n. 16
0
    def notestParentProcessing(self):
        # Does not work with a RAW dataset, need a different workload
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _ = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(47, len(units))
            for unit in units:
                import pdb
                pdb.set_trace()
                self.assertTrue(1 <= unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Esempio n. 17
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        # This dataset does have parents. Adding it here to keep the test going. It seems like "dbs" below is never used
        parentProcArgs2 = {}
        parentProcArgs2.update(parentProcArgs)
        parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'})
        parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs2)
        parentProcSpec.setStartPolicy('Dataset', **self.splitArgs)
        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier)
        dummyDBS = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _ = Dataset(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(64, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(unit['Inputs'].keys(), [dataset])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(0, len(unit['ParentData']))
Esempio n. 18
0
    def testMemCoresSettings(self):
        """
        _testMemCoresSettings_

        Make sure the multicore and memory setings are properly propagated to
        all tasks and steps.
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = ReRecoWorkloadFactory.getTestArguments()
        dataProcArguments["ConfigCacheID"] = recoConfig
        dataProcArguments.update({"SkimName1": "SomeSkim",
                                  "SkimInput1": "RECOoutput",
                                  "Skim1ConfigCacheID": skimConfig})
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"
        dataProcArguments["EnableHarvesting"] = True
        dataProcArguments["DQMConfigCacheID"] = recoConfig

        factory = ReRecoWorkloadFactory()
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments)

        # test default values
        taskPaths = {'/TestWorkload/DataProcessing': ['cmsRun1', 'stageOut1', 'logArch1'],
                     '/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim': ['cmsRun1', 'stageOut1', 'logArch1'],
                     '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged': ['cmsRun1', 'upload1', 'logArch1']}
        for task in taskPaths:
            taskObj = testWorkload.getTaskByPath(task)
            for step in taskPaths[task]:
                stepHelper = taskObj.getStepHelper(step)
                self.assertEqual(stepHelper.getNumberOfCores(), 1)
            # FIXME: not sure whether we should set performance parameters to Harvest jobs?!?
            if task == '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged':
                continue
            # then test Memory requirements
            perfParams = taskObj.jobSplittingParameters()['performance']
            self.assertEqual(perfParams['memoryRequirement'], 2300.0)

        # now test case where args are provided
        dataProcArguments["Multicore"] = 6
        dataProcArguments["Memory"] = 4600.0
        testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments)
        for task in taskPaths:
            taskObj = testWorkload.getTaskByPath(task)
            for step in taskPaths[task]:
                stepHelper = taskObj.getStepHelper(step)
                if not task.endswith('DQMHarvestMerged') and step == 'cmsRun1':
                    self.assertEqual(stepHelper.getNumberOfCores(), dataProcArguments["Multicore"])
                elif step in ('stageOut1', 'upload1', 'logArch1'):
                    self.assertEqual(stepHelper.getNumberOfCores(), 1)
                else:
                    self.assertEqual(stepHelper.getNumberOfCores(), 1, "%s should be single-core" % task)
            # FIXME: not sure whether we should set performance parameters to Harvest jobs?!?
            if task == '/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged':
                continue
            # then test Memory requirements
            perfParams = taskObj.jobSplittingParameters()['performance']
            self.assertEqual(perfParams['memoryRequirement'], dataProcArguments["Memory"])

        return
Esempio n. 19
0
 def testRunWhitelist(self):
     """ReReco lumi split with Run whitelist"""
     # get files with multiple runs
     Globals.GlobalParams.setNumOfRunsPerFile(2)
     # a large number of lumis to ensure we get multiple runs
     Globals.GlobalParams.setNumOfLumisPerBlock(10)
     splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1)
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setRunWhitelist([2, 3])
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                                  inputDataset.processed,
                                  inputDataset.tier)
     dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         # Check number of jobs in element match number for
         # dataset in run whitelist
         jobs = 0
         wq_jobs = 0
         for unit in units:
             wq_jobs += unit['Jobs']
             runLumis = dbs[inputDataset.dbsurl].listRunLumis(dataset = unit['Inputs'].keys()[0])
             for run in runLumis:
                 if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                     jobs += runLumis[run]
         self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
Esempio n. 20
0
 def testRunWhitelist(self):
     """
     ReReco lumi split with Run whitelist
     This test may not do much of anything anymore since listRunLumis is not in DBS3
     """
     # get files with multiple runs
     Globals.GlobalParams.setNumOfRunsPerFile(2)
     # a large number of lumis to ensure we get multiple runs
     Globals.GlobalParams.setNumOfLumisPerBlock(10)
     splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setRunWhitelist([181061, 181175])
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         # Check number of jobs in element match number for
         # dataset in run whitelist
         wq_jobs = 0
         for unit in units:
             wq_jobs += unit['Jobs']
             runLumis = dbs[inputDataset.dbsurl].listRunLumis(dataset=unit['Inputs'].keys()[0])
             for run in runLumis:
                 if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                     # This is what it is with DBS3 unless we calculate it
                     self.assertEqual(runLumis[run], None)
         self.assertEqual(250, int(wq_jobs))
Esempio n. 21
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        # This dataset does have parents. Adding it here to keep the test going. It seems like "dbs" below is never used
        parentProcArgs2 = {}
        parentProcArgs2.update(parentProcArgs)
        parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'})
        parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs2)
        parentProcSpec.setStartPolicy('Dataset', **self.splitArgs)
        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _ = Dataset(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(4, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(unit['Inputs'].keys(), [dataset])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(0, len(unit['ParentData']))
Esempio n. 22
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={
            '206371': [[1, 50], [60, 70]],
            '180899': [[1, 1]],
        })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload,
                                                           task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Esempio n. 23
0
    def testGetMaskedBlocks(self):
        """
        _testGetMaskedBlocks_

        Check that getMaskedBlocks is returning the correct information
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()
        inputDataset.primary = 'SingleElectron'
        inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380'
        inputDataset.tier = 'USER'

        task.data.input.splitting.runs = [166921, 166429, 166911]
        task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20']
        lumiMask = LumiList(compactList={'166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], })
        inputLumis = LumiList(compactList={'166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], })
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = DBSReader(inputDataset.dbsurl)
        maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(task, dbs, dataset)
        for dummyBlock, files in maskedBlocks.iteritems():
            for dummyFile, lumiList in files.iteritems():
                self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
Esempio n. 24
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task)

        # invalid dbs url
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # dataset non existent
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO"
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # invalid run whitelist
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
Esempio n. 25
0
    def testRunWhitelist(self):
        """
        ReReco lumi split with Run whitelist
        This test may not do much of anything anymore since listRunLumis is not in DBS3
        """

        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([180899, 180992])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 45)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                # This fails. listRunLumis does not work correctly with DBS3,
                # returning None for the # of lumis in a run
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(block=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                        # This is what it is with DBS3 unless we calculate it
                        self.assertEqual(runLumis[run], None)
            self.assertEqual(2, int(wq_jobs))
Esempio n. 26
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={'206371': [[1, 50], [60, 70]], '180899': [[1, 1]], })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Esempio n. 27
0
def rerecoWorkload(workloadName, arguments, assignArgs=None):
    factory = ReRecoWorkloadFactory()
    wmspec = factory.factoryWorkloadConstruction(workloadName, arguments)
    if assignArgs:
        args = factory.getAssignTestArguments()
        args.update(assignArgs)
        wmspec.updateArguments(args)
    return wmspec
Esempio n. 28
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])

        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
Esempio n. 29
0
 def createWMSpec(self, name='ReRecoWorkload'):
     factory = ReRecoWorkloadFactory()
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs)
     wmspec.setSpecUrl("/path/to/workload")
     wmspec.setSubscriptionInformation(custodialSites=[],
                                       nonCustodialSites=[], autoApproveSites=[],
                                       priority="Low", custodialSubType="Move")
     return wmspec
Esempio n. 30
0
 def createWMSpec(self, name = 'ReRecoWorkload'):
     factory = ReRecoWorkloadFactory()
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs)
     wmspec.setSpecUrl("/path/to/workload")
     wmspec.setSubscriptionInformation(custodialSites = [],
                                       nonCustodialSites = [], autoApproveSites = [],
                                       priority = "Low", custodialSubType = "Move")
     return wmspec
Esempio n. 31
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])

        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs),
                              Tier1ReRecoWorkload, task)
Esempio n. 32
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
Esempio n. 33
0
 def testDataDirectiveFromQueue(self):
     """Test data directive from queue"""
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []})
         self.assertRaises(RuntimeError, Dataset(**self.splitArgs), Tier1ReRecoWorkload, task, dbs,
                           {dataset + '1': []})
Esempio n. 34
0
 def testLumiSplitTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow split by Lumi"""
     splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2)
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(2428, unit['Jobs'])
Esempio n. 35
0
 def testLumiSplitTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow split by Lumi"""
     splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2)
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(2428, unit['Jobs'])
Esempio n. 36
0
 def testDataDirectiveFromQueue(self):
     """Test data directive from queue"""
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []})
         self.assertRaises(RuntimeError, Dataset(**self.splitArgs), Tier1ReRecoWorkload, task, dbs,
                           {dataset + '1': []})
Esempio n. 37
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs),
                              Tier1ReRecoWorkload, task)
Esempio n. 38
0
 def createReRecoSpec(self, specName, returnType="spec", splitter = None,
                      inputDataset = None, dbsUrl = None, **additionalArgs):
     # update args, then reset them
     args = ReRecoWorkloadFactory.getTestArguments()
     args.update(additionalArgs)
     args["ConfigCacheID"] = createConfig(args["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     spec =  factory.factoryWorkloadConstruction(specName, args)
     if inputDataset != None:
         spec.taskIterator().next().data.input.dataset.primary = inputDataset
     if dbsUrl != None:
         print dbsUrl
         spec.taskIterator().next().data.input.dataset.dbsurl = dbsUrl
     return self._selectReturnType(spec, returnType, splitter)
Esempio n. 39
0
    def testContinuousSplittingSupport(self):
        """Can modify successfully policies for continuous splitting"""
        policyInstance = Block(**self.splitArgs)
        self.assertTrue(policyInstance.supportsWorkAddition(),
                        "Block instance should support continuous splitting")
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, _ = policyInstance(Tier1ReRecoWorkload, task)
            self.assertEqual(47, len(units))
            blocks = []  # fill with blocks as we get work units for them
            inputs = {}
            for unit in units:
                blocks.extend(unit['Inputs'].keys())
                inputs.update(unit['Inputs'])
                self.assertEqual(69, unit['Priority'])
                self.assertTrue(1 <= unit['Jobs'])
                self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertTrue(1 <= unit['NumberOfLumis'])
                self.assertTrue(1 <= unit['NumberOfFiles'])
                self.assertTrue(0 <= unit['NumberOfEvents'])
            self.assertEqual(
                len(units),
                len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))

        # Modify the spec and task, get first a fresh policy instance
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition(
                {'ProcessedInputs': inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance,
                              Tier1ReRecoWorkload, task)

        # Run one last time
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition(
                {'ProcessedInputs': inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance,
                              Tier1ReRecoWorkload, task)

        return
Esempio n. 40
0
 def createReRecoSpec(self, specName, returnType="spec", splitter = None,
                      inputDataset = None, dbsUrl = None, **additionalArgs):
     # update args, then reset them
     args = ReRecoWorkloadFactory.getTestArguments()
     args.update(additionalArgs)
     args["ConfigCacheID"] = createConfig(args["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     spec =  factory.factoryWorkloadConstruction(specName, args)
     if inputDataset != None:
         spec.taskIterator().next().data.input.dataset.primary = inputDataset
     if dbsUrl != None:
         print(dbsUrl)
         spec.taskIterator().next().data.input.dataset.dbsurl = dbsUrl
     return self._selectReturnType(spec, returnType, splitter)
Esempio n. 41
0
    def createReRecoSpec(self, specName, returnType="spec", splitter = None,
                         assignKwargs={}, **additionalArgs):
        # update args, then reset them
        args = ReRecoWorkloadFactory.getTestArguments()
        args.update(additionalArgs)
        args["ConfigCacheID"] = createConfig(args["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        spec = factory.factoryWorkloadConstruction(specName, args)

        if assignKwargs:
            args = ReRecoWorkloadFactory.getAssignTestArguments()
            args.update(assignKwargs)
            spec.updateArguments(args)

        return self._selectReturnType(spec, returnType, splitter)
Esempio n. 42
0
    def getMergeACDCSpec(self, splittingAlgo = 'ParentlessMergeBySize', splittingArgs = {}):
        """
        _getMergeACDCSpec_

        Get a ACDC spec for the merge task of a ReReco workload
        """
        factory = ReRecoWorkloadFactory()
        rerecoArgs = ReRecoWorkloadFactory.getTestArguments()
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(self.workflowName, rerecoArgs)
        Tier1ReRecoWorkload.truncate('ACDC_%s' % self.workflowName, '/%s/DataProcessing/DataProcessingMergeRECOoutput' % self.workflowName,
                                     self.couchUrl, self.acdcDBName)
        Tier1ReRecoWorkload.setJobSplittingParameters('/ACDC_%s/DataProcessingMergeRECOoutput' % self.workflowName,
                                                      splittingAlgo, splittingArgs)
        return Tier1ReRecoWorkload
Esempio n. 43
0
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow"""
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)

        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(47, len(units))
            for unit in units:
                self.assertTrue(1 <= unit['Jobs'])
            self.assertEqual(0, len(rejectedWork))
Esempio n. 44
0
def getSchema(groupName = 'PeopleLikeMe', userName = '******'):
    schema = ReRecoWorkloadFactory.getTestArguments()
    schema['RequestName'] = 'TestReReco'
    schema['RequestType'] = 'ReReco'
    schema['Requestor'] = '%s' % userName
    schema['Group'] = '%s' % groupName
    return schema
Esempio n. 45
0
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow"""
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)

        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(47, len(units))
            for unit in units:
                self.assertTrue(1 <= unit['Jobs'])
            self.assertEqual(0, len(rejectedWork))
Esempio n. 46
0
 def testLumiSplitTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow split by Lumi"""
     splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 2)
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                                  inputDataset.processed,
                                  inputDataset.tier)
     dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(4, unit['Jobs'])
Esempio n. 47
0
 def testLumiSplitTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow split by Lumi"""
     splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 2)
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                                  inputDataset.processed,
                                  inputDataset.tier)
     dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(4, unit['Jobs'])
Esempio n. 48
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        dummyWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2)

        # Block blacklist
        lumiWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        #task.data.input.splitting.runs = ['1']
        task.data.input.splitting.runs = ['180992']
        task.data.input.splitting.lumis = ['1,1']
        units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(len(rejectedWork), 46)
Esempio n. 49
0
    def getProcessingACDCSpec(self, splittingAlgo = 'LumiBased', splittingArgs = {'lumis_per_job' : 8},
                              setLocationFlag = False):
        """
        _getProcessingACDCSpec_

        Get a ACDC spec for the processing task of a ReReco workload
        """
        factory = ReRecoWorkloadFactory()
        rerecoArgs = ReRecoWorkloadFactory.getTestArguments()
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(self.workflowName, rerecoArgs)
        Tier1ReRecoWorkload.truncate('ACDC_%s' % self.workflowName, '/%s/DataProcessing' % self.workflowName, self.couchUrl,
                                     self.acdcDBName)
        Tier1ReRecoWorkload.setJobSplittingParameters('/ACDC_%s/DataProcessing' % self.workflowName, splittingAlgo, splittingArgs)
        if setLocationFlag:
            Tier1ReRecoWorkload.setTrustLocationFlag()
            Tier1ReRecoWorkload.setSiteWhitelist(self.siteWhitelist)
        return Tier1ReRecoWorkload
Esempio n. 50
0
    def testDatasetLocation(self):
        """
        _testDatasetLocation_

        This is a function of all start policies so only test it here
        as there is no StartPolicyInterface unit test
        """
        policyInstance = Block(**self.splitArgs)
        # The policy instance must be called first to initialize the values
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance(Tier1ReRecoWorkload, task)
            outputs = policyInstance.getDatasetLocations({'https://cmsweb.cern.ch/dbs/prod/global/DBSReader':Tier1ReRecoWorkload.listOutputDatasets()})
            for dataset in outputs:
                self.assertEqual(sorted(outputs[dataset]), [])
        return
Esempio n. 51
0
    def createReRecoSpec(self,
                         specName,
                         returnType="spec",
                         splitter=None,
                         assignKwargs={},
                         **additionalArgs):
        # update args, then reset them
        args = ReRecoWorkloadFactory.getTestArguments()
        args.update(additionalArgs)
        args["ConfigCacheID"] = createConfig(args["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        spec = factory.factoryWorkloadConstruction(specName, args)

        if assignKwargs:
            args = ReRecoWorkloadFactory.getAssignTestArguments()
            args.update(assignKwargs)
            spec.updateArguments(args)

        return self._selectReturnType(spec, returnType, splitter)
Esempio n. 52
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        dummyWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)

        # Block blacklist
        lumiWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        #task.data.input.splitting.runs = ['1']
        task.data.input.splitting.runs = ['180992']
        task.data.input.splitting.lumis = ['1,1']
        units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(len(rejectedWork), 46)
Esempio n. 53
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier)
     dummyDBS = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(15, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertEqual(unit['Inputs'].keys(), [dataset])
             self.assertEqual(4855, unit['NumberOfLumis'])
             self.assertEqual(72, unit['NumberOfFiles'])
             self.assertEqual(743201, unit['NumberOfEvents'])
Esempio n. 54
0
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow"""
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(47, len(units))
            blocks = []  # fill with blocks as we get work units for them
            for unit in units:
                self.assertTrue(1 <= unit['Jobs'])
            self.assertEqual(0, len(rejectedWork))
Esempio n. 55
0
    def testRunWhitelist(self):
        """
        ReReco lumi split with Run whitelist
        This test may not do much of anything anymore since listRunLumis is not in DBS3
        """
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(8)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(20)
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([180899, 180992])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 45)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                # This fails. listRunLumis does not work correctly with DBS3, returning None for the # of lumis in a run
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(
                    block=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(
                            Tier1ReRecoWorkload).inputRunWhitelist():
                        self.assertEqual(
                            runLumis[run], None
                        )  # This is what it is with DBS3 unless we calculate it
            self.assertEqual(2, int(wq_jobs))
Esempio n. 56
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     splitArgs = dict(SliceType='NumberOfFiles', SliceSize=5)
     rerecoArgs = getReRecoArgs()
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
         'ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).getInputDatasetPath()
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(15, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertEqual(unit['Inputs'].keys(), [inputDataset])
             self.assertEqual(4855, unit['NumberOfLumis'])
             self.assertEqual(72, unit['NumberOfFiles'])
             self.assertEqual(743201, unit['NumberOfEvents'])
Esempio n. 57
0
    def atestLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        lumiWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        task.data.input.splitting.runs = ['1']
        task.data.input.splitting.lumis = ['1,1']
        units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(len(rejectedWork), 1)
Esempio n. 58
0
    def testDatasetLocation(self):
        """
        _testDatasetLocation_

        This is a function of all start policies so only test it here
        as there is no StartPolicyInterface unit test
        """
        policyInstance = Block(**self.splitArgs)
        # The policy instance must be called first to initialize the values
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance(Tier1ReRecoWorkload, task)
            outputs = policyInstance.getDatasetLocations({
                'https://cmsweb.cern.ch/dbs/prod/global/DBSReader':
                Tier1ReRecoWorkload.listOutputDatasets()
            })
            for dataset in outputs:
                self.assertEqual(sorted(outputs[dataset]), [])
        return