Example #1
0
    def testGetMaskedBlocks(self):
        """
        _testGetMaskedBlocks_

        Check that getMaskedBlocks is returning the correct information
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()
        inputDataset.primary = 'SingleElectron'
        inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380'
        inputDataset.tier = 'USER'

        task.data.input.splitting.runs = [166921, 166429, 166911]
        task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20']
        lumiMask = LumiList(compactList={'166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], })
        inputLumis = LumiList(compactList={'166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], })
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = DBSReader(inputDataset.dbsurl)
        maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(task, dbs, dataset)
        for dummyBlock, files in maskedBlocks.iteritems():
            for dummyFile, lumiList in files.iteritems():
                self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
Example #2
0
    def testGetMaskedBlocks(self):
        """
        _testGetMaskedBlocks_

        Check that getMaskedBlocks is returning the correct information
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()
        inputDataset.primary = 'SingleElectron'
        inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380'
        inputDataset.tier = 'USER'

        task.data.input.splitting.runs = [166921, 166429, 166911]
        task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20']
        lumiMask = LumiList(compactList={'166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], })
        inputLumis = LumiList(compactList={'166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], })
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = DBSReader(inputDataset.dbsurl)
        maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(task, dbs, dataset)
        for dummyBlock, files in maskedBlocks.iteritems():
            for dummyFile, lumiList in files.iteritems():
                self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
Example #3
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                        assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task)

        # invalid dbs url
        processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                        assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # dataset non existent
        processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                        assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        getFirstTask(processingSpec).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO"
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                        assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
Example #4
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dbs url
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(
            processingSpec
        ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # invalid dataset name
        # This test is throwing a DBS exception but continuing on:
        """
        Message: DbsBadRequest: DBS Server Raised An Error
	ModuleName : WMCore.Services.DBS.DBSErrors
	MethodName : __init__
	ClassInstance : None
	FileName : /data/srv/wmagent_ewv/v0.9.94c/sw/slc5_amd64_gcc461/cms/wmagent/0.9.94c/lib/python2.6/site-packages/WMCore/Services/DBS/DBSErrors.py
	ClassName : None
	LineNumber : 29
	ErrorNr : 1002
        """
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec
                     ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)

        # invalid run whitelist
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)
Example #5
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(),
                             len(units))
            blocks = []  # fill with blocks as we get work units for them
            for unit in units:
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(
                len(units),
                len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Example #6
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
         'ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.data.request.priority = 69
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(47, len(units))
         blocks = []  # fill with blocks as we get work units for them
         for unit in units:
             self.assertEqual(69, unit['Priority'])
             self.assertTrue(1 <= unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertTrue(1 <= unit['NumberOfLumis'])
             self.assertTrue(1 <= unit['NumberOfFiles'])
             self.assertTrue(0 <= unit['NumberOfEvents'])
         self.assertEqual(
             len(units),
             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Example #7
0
    def testRunWhitelist(self):
        """ReReco lumi split with Run whitelist"""
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(8)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(20)
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            # Check number of jobs in element match number for
            # dataset in run whitelist
            jobs = 0
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(
                    block=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(
                            Tier1ReRecoWorkload).inputRunWhitelist():
                        jobs += runLumis[run]
            self.assertEqual(int(jobs / splitArgs['SliceSize']), int(wq_jobs))
Example #8
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={
            '206371': [[1, 50], [60, 70]],
            '180899': [[1, 1]],
        })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload,
                                                           task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Example #9
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     Tier1ReRecoWorkload = rerecoWorkload(
         'ReRecoWorkload',
         rerecoArgs,
         assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
     Tier1ReRecoWorkload.data.request.priority = 69
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(47, len(units))
         for unit in units:
             self.assertEqual(69, unit['Priority'])
             self.assertTrue(1 <= unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertTrue(1 <= unit['NumberOfLumis'])
             self.assertTrue(1 <= unit['NumberOfFiles'])
             self.assertTrue(0 <= unit['NumberOfEvents'])
         self.assertEqual(
             len(units),
             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Example #10
0
    def testMultiTaskProcessingWorkload(self):
        """Multi Task Processing Workflow"""
        datasets = []
        tasks, count = 0, 0
        for task in MultiTaskProcessingWorkload.taskIterator():
            tasks += 1
            inputDataset = task.inputDataset()
            datasets.append("/%s/%s/%s" %
                            (inputDataset.primary, inputDataset.processed,
                             inputDataset.tier))
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}

        for task in MultiTaskProcessingWorkload.taskIterator():
            units, _ = Block(**self.splitArgs)(MultiTaskProcessingWorkload,
                                               task)
            self.assertEqual(58, len(units))
            blocks = []  # fill with blocks as we get work units for them

            for unit in units:
                self.assertTrue(1 <= unit['Jobs'])
                self.assertEqual(MultiTaskProcessingWorkload, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
            self.assertEqual(
                len(units),
                len(dbs[inputDataset.dbsurl].getFileBlocksInfo(datasets[0])))
            count += 1
        self.assertEqual(tasks, count)
Example #11
0
    def testRunWhitelist(self):
        """
        ReReco lumi split with Run whitelist
        This test may not do much of anything anymore since listRunLumis is not in DBS3
        """

        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                             assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([180899, 180992])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork, badWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 45)
            self.assertEqual(len(badWork), 0)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                # This fails. listRunLumis does not work correctly with DBS3,
                # returning None for the # of lumis in a run
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(block=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                        # This is what it is with DBS3 unless we calculate it
                        self.assertEqual(runLumis[run], None)
            self.assertEqual(2, int(wq_jobs))
Example #12
0
    def notestParentProcessing(self):
        # Does not work with a RAW dataset, need a different workload
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(
            parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        parentProcSpec = factory.factoryWorkloadConstruction(
            'testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _ = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(47, len(units))
            blocks = []  # fill with blocks as we get work units for them
            for unit in units:
                import pdb
                pdb.set_trace()
                self.assertTrue(1 <= unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(
                len(units),
                len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Example #13
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        Tier1ReRecoWorkload = rerecoWorkload(
            'ReRecoWorkload',
            rerecoArgs,
            assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={
            '206371': [[1, 50], [60, 70]],
            '180899': [[1, 1]],
        })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload,
                                                           task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Example #14
0
    def testDatasetLocation(self):
        """
        _testDatasetLocation_

        This is a function of all start policies so only test it here
        as there is no StartPolicyInterface unit test
        """
        policyInstance = Block(**self.splitArgs)
        # The policy instance must be called first to initialize the values
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance(Tier1ReRecoWorkload, task)
            outputs = policyInstance.getDatasetLocations({'http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet' :
                                                          Tier1ReRecoWorkload.listOutputDatasets()})
            for dataset in outputs:
                self.assertEqual(sorted(outputs[dataset]), ['T2_XX_SiteA', 'T2_XX_SiteB'])
        return
Example #15
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                             assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])

        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
Example #16
0
    def testDatasetLocation(self):
        """
        _testDatasetLocation_

        This is a function of all start policies so only test it here
        as there is no StartPolicyInterface unit test
        """
        policyInstance = Block(**self.splitArgs)
        # The policy instance must be called first to initialize the values
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                             assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance(Tier1ReRecoWorkload, task)
            outputs = policyInstance.getDatasetLocations(
                {'https://cmsweb.cern.ch/dbs/prod/global/DBSReader': Tier1ReRecoWorkload.listInputDatasets()})
            for dataset in outputs:
                self.assertItemsEqual(outputs[dataset], ['T2_XX_SiteA', 'T2_XX_SiteB'])
        return
Example #17
0
    def testDatasetLocation(self):
        """
        _testDatasetLocation_

        This is a function of all start policies so only test it here
        as there is no StartPolicyInterface unit test
        """
        policyInstance = Block(**self.splitArgs)
        # The policy instance must be called first to initialize the values
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                             assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance(Tier1ReRecoWorkload, task)
            outputs = policyInstance.getDatasetLocations(
                {'https://cmsweb.cern.ch/dbs/prod/global/DBSReader': Tier1ReRecoWorkload.listInputDatasets()})
            for dataset in outputs:
                self.assertItemsEqual(outputs[dataset], ['T2_XX_SiteA', 'T2_XX_SiteB'])
        return
Example #18
0
    def testDatasetLocation(self):
        """
        _testDatasetLocation_

        This is a function of all start policies so only test it here
        as there is no StartPolicyInterface unit test
        """
        policyInstance = Block(**self.splitArgs)
        # The policy instance must be called first to initialize the values
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance(Tier1ReRecoWorkload, task)
            outputs = policyInstance.getDatasetLocations({'https://cmsweb.cern.ch/dbs/prod/global/DBSReader':Tier1ReRecoWorkload.listOutputDatasets()})
            for dataset in outputs:
                self.assertEqual(sorted(outputs[dataset]), [])
        return
Example #19
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])

        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs),
                              Tier1ReRecoWorkload, task)
Example #20
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dbs url
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(
            processingSpec
        ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dataset name
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec
                     ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)

        # blocks with 0 files are skipped
        # set all blocks in request to 0 files, no work should be found & an error is raised
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)
        Globals.GlobalParams.resetParams()
Example #21
0
    def testDatasetLocation(self):
        """
        _testDatasetLocation_

        This is a function of all start policies so only test it here
        as there is no StartPolicyInterface unit test
        """
        policyInstance = Block(**self.splitArgs)
        # The policy instance must be called first to initialize the values
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance(Tier1ReRecoWorkload, task)
            outputs = policyInstance.getDatasetLocations({
                'https://cmsweb.cern.ch/dbs/prod/global/DBSReader':
                Tier1ReRecoWorkload.listOutputDatasets()
            })
            for dataset in outputs:
                self.assertEqual(sorted(outputs[dataset]), [])
        return
Example #22
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs),
                              Tier1ReRecoWorkload, task)
Example #23
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Globals.GlobalParams.setNumOfFilesPerBlock(0)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs),
                              Tier1ReRecoWorkload, task)
Example #24
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dbs url
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(
            processingSpec
        ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # dataset non existent
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(
            processingSpec
        ).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO"
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # invalid run whitelist
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)
Example #25
0
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow"""
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                             assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)

        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork, badWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(47, len(units))
            for unit in units:
                self.assertTrue(1 <= unit['Jobs'])
            self.assertEqual(0, len(rejectedWork))
            self.assertEqual(0, len(badWork))
Example #26
0
    def testPileupData(self):
        """
        _testPileupData_

        Check that every workqueue element split contains the pile up data
        if it is present in the workload.
        """
        for task in MultiTaskProcessingWorkload.taskIterator():
            units, _, _ = Block(**self.splitArgs)(MultiTaskProcessingWorkload, task)
            self.assertEqual(58, len(units))
            for unit in units:
                pileupData = unit["PileupData"]
                self.assertEqual(len(pileupData), 1)
                self.assertItemsEqual(pileupData.values()[0], ['T2_XX_SiteA', 'T2_XX_SiteB', 'T2_XX_SiteC'])
        return
Example #27
0
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow"""
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(2, len(units))
            blocks = []  # fill with blocks as we get work units for them
            for unit in units:
                self.assertEqual(4, unit['Jobs'])
Example #28
0
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow"""
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)

        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(47, len(units))
            for unit in units:
                self.assertTrue(1 <= unit['Jobs'])
            self.assertEqual(0, len(rejectedWork))
Example #29
0
    def testContinuousSplittingSupport(self):
        """Can modify successfully policies for continuous splitting"""
        policyInstance = Block(**self.splitArgs)
        self.assertTrue(policyInstance.supportsWorkAddition(), "Block instance should support continuous splitting")
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, _ = policyInstance(Tier1ReRecoWorkload, task)
            self.assertEqual(47, len(units))
            blocks = []  # fill with blocks as we get work units for them
            inputs = {}
            for unit in units:
                blocks.extend(unit['Inputs'].keys())
                inputs.update(unit['Inputs'])
                self.assertEqual(69, unit['Priority'])
                self.assertTrue(1 <= unit['Jobs'])
                self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertTrue(1 <= unit['NumberOfLumis'])
                self.assertTrue(1 <= unit['NumberOfFiles'])
                self.assertTrue(0 <= unit['NumberOfEvents'])
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))

        # Modify the spec and task, get first a fresh policy instance
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs': inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task)

        # Run one last time
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs': inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task)

        return
Example #30
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        task.data.input.splitting.runs = ['1']
        task.data.input.splitting.lumis = ['1,1']
        units = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
Example #31
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        dummyWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)

        # Block blacklist
        lumiWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        #task.data.input.splitting.runs = ['1']
        task.data.input.splitting.runs = ['180992']
        task.data.input.splitting.lumis = ['1,1']
        units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(len(rejectedWork), 46)
Example #32
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"])
        dummyWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2,
                                       assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})

        # Block blacklist
        lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2,
                                      assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        task = getFirstTask(lumiWorkload)
        # task.data.input.splitting.runs = ['1']
        task.data.input.splitting.runs = ['180992']
        task.data.input.splitting.lumis = ['1,1']
        units, rejectedWork, badWork = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(len(rejectedWork), 46)
        self.assertEqual(len(badWork), 0)
Example #33
0
 def testDataDirectiveFromQueue(self):
     """Test data directive from queue"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                          assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                              inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         # Take dataset and force to run over only 1 block
         units, _, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task,
                                               {dataset + '#28315b28-0c5c-11e1-b764-003048caaace': []})
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(1, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
         self.assertNotEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Example #34
0
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow"""
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(47, len(units))
            blocks = []  # fill with blocks as we get work units for them
            for unit in units:
                self.assertTrue(1 <= unit['Jobs'])
            self.assertEqual(0, len(rejectedWork))
Example #35
0
    def testRunWhitelist(self):
        """
        ReReco lumi split with Run whitelist
        This test may not do much of anything anymore since listRunLumis is not in DBS3
        """
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(8)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(20)
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([180899, 180992])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 45)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                # This fails. listRunLumis does not work correctly with DBS3, returning None for the # of lumis in a run
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(
                    block=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(
                            Tier1ReRecoWorkload).inputRunWhitelist():
                        self.assertEqual(
                            runLumis[run], None
                        )  # This is what it is with DBS3 unless we calculate it
            self.assertEqual(2, int(wq_jobs))
Example #36
0
    def testContinuousSplittingSupport(self):
        """Can modify successfully policies for continuous splitting"""
        policyInstance = Block(**self.splitArgs)
        self.assertTrue(policyInstance.supportsWorkAddition(), "Block instance should support continuous splitting")
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, _ = policyInstance(Tier1ReRecoWorkload, task)
            self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units))
            blocks = [] # fill with blocks as we get work units for them
            inputs = {}
            for unit in units:
                blocks.extend(unit['Inputs'].keys())
                inputs.update(unit['Inputs'])
                self.assertEqual(69, unit['Priority'])
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(4, unit['NumberOfLumis'])
                self.assertEqual(10, unit['NumberOfFiles'])
                self.assertEqual(10000, unit['NumberOfEvents'])
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))

        # Modify the spec and task, get first a fresh policy instance
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs' : inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task)

        # Pop up 2 more blocks for the dataset with different statistics
        Globals.GlobalParams.setNumOfBlocksPerDataset(Globals.GlobalParams.numOfBlocksPerDataset() + 2)
        Globals.GlobalParams.setNumOfFilesPerBlock(10) # Emulator is crooked, it gives the sum of all the files in the dataset not block


        # Now run another pass of the Block policy
        policyInstance = Block(**self.splitArgs)
        policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs' : inputs.keys()})
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = policyInstance(Tier1ReRecoWorkload, task)
            self.assertEqual(2, len(units))
            self.assertEqual(0, len(rejectedWork))
            for unit in units:
                blocks.extend(unit['Inputs'].keys())
                inputs.update(unit['Inputs'])
                self.assertEqual(69, unit['Priority'])
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(8, unit['NumberOfLumis'])
                self.assertEqual(40, unit['NumberOfFiles'])
                self.assertEqual(40000, unit['NumberOfEvents'])
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)) - 2)

        # Run one last time
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition({'ProcessedInputs' : inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance, Tier1ReRecoWorkload, task)

        return